diff --git "a/job.json" "b/job.json" deleted file mode 100644--- "a/job.json" +++ /dev/null @@ -1,200738 +0,0 @@ -{ - "in_dir": "/home/raven/exllamav2/models/Iambe-RP-cDPO-20b", - "out_dir": "/home/raven/exllamav2/cdpoquants", - "cal_dataset": "/home/raven/exllamav2/parquet/wikitext-test.parquet", - "dataset_rows": 100, - "measurement_rows": 16, - "gpu_rows": 100, - "length": 2560, - "measurement_length": 2560, - "bits": 3.0, - "head_bits": 6, - "progress": "finished", - "shard_size": 1024.0, - "output_measurement": null, - "compile_full": null, - "measurement": [ - { - "key": "model.layers.0.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.017855124548077583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.017693933099508286, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.00822196900844574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.00818557944148779, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.008147649466991425, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.004973818548023701, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.025207163766026497, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.017565717920660973, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.008165284991264343, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.008124261163175106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.008577357977628708, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.010064829140901566, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.00811342615634203, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.005650249309837818, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.004953920841217041, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.006031223107129335, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.004946481436491013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.00469511142000556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.004945348482578993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.004693333059549332, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.005191183649003506, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.004944662097841501, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.004923863802105188, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00469222804531455, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.017855124548077583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.017855124548077583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.018269823864102364, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.018016638234257698, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.008654029108583927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.00858929380774498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.008523904718458652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0040773325599730015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.02252083271741867, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.017843613401055336, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.008560837246477604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.008477216586470604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.00876569002866745, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.009265399537980556, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.008458874188363552, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.005131629761308432, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0040254732593894005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.005095046479254961, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.004002923145890236, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0035677538253366947, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0039987098425626755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0035625773016363382, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.003681379836052656, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0039975508116185665, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.003099101595580578, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0035592056810855865, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.018269823864102364, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.018269823864102364, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.06040062755346298, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.05015246197581291, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.039811912924051285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.03374486789107323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.02700021117925644, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.019327906891703606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.04943128302693367, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.038508959114551544, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0287610050290823, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.022134624421596527, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.022930754348635674, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02622988447546959, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.018830332905054092, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.013481411151587963, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.011817384511232376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013292579911649227, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.007597919087857008, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0068505811505019665, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.006747394800186157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0057624452747404575, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006899196654558182, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006222137250006199, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0040918197482824326, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004550553858280182, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.06040062755346298, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.06040062755346298, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.022998467087745667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.013817272149026394, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.00843335036188364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.009623706340789795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.00929997768253088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.00454730074852705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.014596134424209595, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.012763324193656445, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.01063691545277834, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0064912112429738045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.00692259194329381, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0073844268918037415, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.00634006317704916, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.004910989198833704, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.004520855378359556, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.003902910277247429, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0032670069485902786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0031557080801576376, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0029163763392716646, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0027198975440114737, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0024607335217297077, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0029004737734794617, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.002081361599266529, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.002571784658357501, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.022998467087745667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.022998467087745667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.06359121948480606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.05922049656510353, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.05777737870812416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.05232781171798706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.028657449409365654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.027243809774518013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.03216307610273361, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.02968679554760456, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.029107967391610146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.025888554751873016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.024637194350361824, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.016377264633774757, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.014359693042933941, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.013894416391849518, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.013789449818432331, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.008266951888799667, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.007634526584297419, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.007598332595080137, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.0071376594714820385, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.007072584703564644, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.004655889701098204, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.005278386641293764, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.004504258278757334, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.004221218638122082, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.06359121948480606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.06359121948480606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.09644634276628494, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.08990646153688431, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.08782470226287842, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.07959549129009247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.043409839272499084, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.04131804406642914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.04857543855905533, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.04485626891255379, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.044071607291698456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.03918606415390968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.03729763627052307, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.024593854323029518, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.021335581317543983, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.020711863413453102, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.02056281827390194, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.012292674742639065, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.010800695046782494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.010752201080322266, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.009985068812966347, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.009890494868159294, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.0065513127483427525, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.006770749110728502, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.006327957380563021, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.0047646271996200085, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.09644634276628494, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.09644634276628494, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.0.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.05551573261618614, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.04530042037367821, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.03210851550102234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.02871548756957054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.02271902561187744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.016091329976916313, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.04043842479586601, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.03707321360707283, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.02465587481856346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.01877368614077568, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.02048463560640812, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.018251745030283928, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.016118982806801796, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.013685623183846474, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.010927301831543446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.01218467578291893, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.00797371193766594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.0062966072000563145, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.007431205362081528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.005427424795925617, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.0069187358021736145, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.007110525853931904, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.004496191628277302, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.004580639768391848, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.05551573261618614, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.05551573261618614, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.014112050645053387, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.011084234341979027, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.008546131663024426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0074958642944693565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.006011510733515024, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.004070105962455273, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.010666334070265293, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.008615447208285332, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.006575222592800856, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.004869508091360331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.005144449882209301, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.005649432074278593, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.004210304003208876, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0029896325431764126, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.002615596866235137, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.002895254408940673, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.001693784142844379, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0015289544826373458, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0014984692679718137, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0012665623798966408, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0015472315717488527, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.001394278253428638, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.000979720614850521, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0010132344905287027, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.014112050645053387, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.014112050645053387, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.013583794236183167, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.01054299809038639, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.008074230514466763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.007088701240718365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.005690825171768665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0038598307874053717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.009398390538990498, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.008215050213038921, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.006270338781177998, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.00456771207973361, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.004673483781516552, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.004783657845109701, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.003942121285945177, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0028183883987367153, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.002482924610376358, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0024161862675100565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0015891448128968477, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.001450429786927998, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0013949720887467265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0011906711151823401, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.001305583631619811, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.001295721740461886, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.000901087187230587, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0009462154703214765, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.013583794236183167, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.013583794236183167, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.08184764534235, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.06851489096879959, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.06266964972019196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.05384159833192825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.03668026626110077, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03097805380821228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0460994616150856, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04210059717297554, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.03864806145429611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.02940521016716957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.027651125565171242, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.023612502962350845, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.020260872319340706, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.017757155001163483, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.017125684767961502, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.011849187314510345, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.009412764571607113, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.009183471091091633, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.008016598410904408, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.007593152113258839, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006258389912545681, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006408046931028366, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.005383521318435669, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004494069144129753, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.08184764534235, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.08184764534235, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.15103895962238312, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09421385079622269, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.06609310954809189, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.06564125418663025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06456640362739563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03794488683342934, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.09025315940380096, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.08050718158483505, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.07231876999139786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.041107892990112305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04343434423208237, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.04674019664525986, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03992302715778351, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03282904252409935, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.030933553352952003, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.024018066003918648, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01965528167784214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.019172238186001778, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.016089707612991333, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014935513027012348, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.013958022929728031, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015937548130750656, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.011805696412920952, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.013354935683310032, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09421385079622269, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09421385079622269, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.11017551273107529, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.10433200001716614, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1025719940662384, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.09368858486413956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.04924049973487854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.047539595514535904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.054535094648599625, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.050255678594112396, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.04974411427974701, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.04535282030701637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.04357711225748062, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.02773919887840748, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.024058885872364044, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.02361675165593624, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.02351406216621399, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.01393929310142994, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.012680013664066792, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.012650415301322937, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.011974196881055832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01191637385636568, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.007754679769277573, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.00839032232761383, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.0076124840416014194, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.006469635758548975, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.11017551273107529, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.11017551273107529, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.13614021241664886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.12920871376991272, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.12720508873462677, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.11659260839223862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.06099899113178253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.05896104872226715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.06742070615291595, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.06219139322638512, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.06159619614481926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.05632127821445465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.05403679236769676, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.033917903900146484, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.02939658984541893, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.028879471123218536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.02875673584640026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.016894621774554253, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.014708178117871284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.014676552265882492, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.013786649331450462, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.013710633851587772, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.008792576380074024, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.008661706931889057, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.008611748926341534, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.005536102689802647, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.11659260839223862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.11659260839223862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.1.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.12528935074806213, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.11213313788175583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.0980110764503479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.08867865800857544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.05556869134306908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.046183716505765915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.08001065254211426, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.07202697545289993, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.05732632800936699, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.04962759464979172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.048154883086681366, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.03921503573656082, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.03522174805402756, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.02748723514378071, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.025322752073407173, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.020824842154979706, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.015760106965899467, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.014524328522384167, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.014763204380869865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.013281376101076603, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.012729140929877758, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.012533956207334995, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.010560311377048492, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.00939914770424366, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.11213313788175583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.11213313788175583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.03315313160419464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.030353432521224022, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.029242049902677536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.026186050847172737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.01514621265232563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.01412622258067131, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.018259430304169655, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.016134465113282204, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.01545834168791771, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.013402162119746208, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.01296021044254303, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.009385320357978344, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.007766792085021734, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.007342520635575056, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.007239481434226036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0047400337643921375, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0038355456199496984, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0037962915375828743, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.003500409424304962, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.003435769584029913, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0024985126219689846, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.002414118964225054, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.002360024955123663, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.001651102676987648, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.03315313160419464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.03315313160419464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.026102114468812943, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.02382536605000496, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.022813500836491585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0203483235090971, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.011791597120463848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.010903986170887947, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.014036240056157112, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.012735080905258656, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.012054999358952045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.010364198125898838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.009827236644923687, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.007102865260094404, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.006074689328670502, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.005653788801282644, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.005551866255700588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0035650695208460093, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0029507954604923725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.002909253118559718, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.002671679947525263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.00260711507871747, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0018964478513225913, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0018903465243056417, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0017428320134058595, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0013004831271246076, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.026102114468812943, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.026102114468812943, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12049976736307144, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.111831896007061, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10900012403726578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09831618517637253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05545883625745773, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05245508253574371, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06250417232513428, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05750903859734535, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.056409601122140884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.049496475607156754, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04687770456075668, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.031699635088443756, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.027404412627220154, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02646632120013237, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02625182457268238, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015820302069187164, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013505401089787483, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013436607085168362, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012286515906453133, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01214319933205843, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008169716224074364, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008028018288314342, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007825551554560661, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005071786232292652, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.111831896007061, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.111831896007061, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.16094961762428284, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1285616159439087, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11638171970844269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09249689429998398, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.07208473235368729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05951826646924019, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0904671922326088, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.08125880360603333, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.07662878185510635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.052872009575366974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04880505055189133, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.04675811156630516, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.039795346558094025, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.035718128085136414, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.034710999578237534, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.024007700383663177, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.020261084660887718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.019994081929326057, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.016992075368762016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01635449007153511, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.014105567708611488, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015060456469655037, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.012911360710859299, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.012271999381482601, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11638171970844269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11638171970844269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.14549097418785095, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.13796983659267426, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.13578246533870697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.12474432587623596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.06651206314563751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0641942247748375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.07370392978191376, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.06783703714609146, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.0672135129570961, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06128060817718506, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.05915132910013199, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.03751477226614952, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03240625560283661, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.031826768070459366, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03168432042002678, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.018802789971232414, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.016681624576449394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.01664106920361519, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01567968912422657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.015595321543514729, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.010228574275970459, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.010499268770217896, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.010043030604720116, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.0075611937791109085, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.07370392978191376, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.07370392978191376, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17960932850837708, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1705435961484909, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16779999434947968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15443375706672668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08234508335590363, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0794992595911026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09123457968235016, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0839749202132225, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08323688060045242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0759010910987854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07322487235069275, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.046254146844148636, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03991567716002464, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.039201024919748306, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.039036016911268234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023093244060873985, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020056437700986862, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020011013373732567, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018771061673760414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018666421994566917, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012187664397060871, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.011913559399545193, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011955950409173965, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.007759932894259691, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09123457968235016, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09123457968235016, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.2.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.1839911937713623, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.16512265801429749, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.15784457325935364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.14220228791236877, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.08322867751121521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.07608504593372345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.09798524528741837, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.0899709090590477, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.0860389843583107, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.07284097373485565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.06950671970844269, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.04993507266044617, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.043062351644039154, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.04002374783158302, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.039286013692617416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.0250809695571661, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.021046005189418793, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.020884564146399498, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.019029483199119568, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.018564781174063683, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.013565595261752605, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.013688812032341957, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.012585395015776157, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.009662440977990627, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.09798524528741837, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.09798524528741837, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.04724431410431862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.04320704936981201, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.04149910807609558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.037401266396045685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.021750018000602722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.01994197629392147, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.025991855189204216, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.023163842037320137, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.022157028317451477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.01906537637114525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.01823834516108036, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.013360608369112015, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.011134722270071507, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.010521753691136837, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.010376746766269207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.006752819288522005, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.00549242552369833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.005433529149740934, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.004916119854897261, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0048205070197582245, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0035830598790198565, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.003463496919721365, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.003387550823390484, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0023714127019047737, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.04724431410431862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.04724431410431862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.03729400783777237, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.03431804105639458, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.032925091683864594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.029523957520723343, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.017071545124053955, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.015836471691727638, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.020299497991800308, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.018431833013892174, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.017400771379470825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.015102656558156013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.01445687748491764, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.010304328985512257, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.00881131086498499, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.008213929831981659, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.00806785561144352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.005170144140720367, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0043151187710464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.004252829123288393, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.003932074178010225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.003841764759272337, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0027643400244414806, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0027986459899693727, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0025620784144848585, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.001973734237253666, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.03729400783777237, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.03729400783777237, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1659296452999115, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.15449678897857666, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1507587730884552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.13618813455104828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.07695132493972778, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07290824502706528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.08719587326049805, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.07969281077384949, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.07821843773126602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06878043711185455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0654534325003624, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.044272374361753464, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03801623731851578, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03677661344408989, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.03648320212960243, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02212032675743103, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01875445619225502, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.018660271540284157, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.017078902572393417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.016885990276932716, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01146818045526743, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.011125290766358376, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.010995432734489441, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00700842821970582, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.08719587326049805, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.08719587326049805, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.14826415479183197, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12295010685920715, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10886794328689575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08867187052965164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06653375923633575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05488405004143715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.09539812058210373, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.08143974840641022, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.07020910084247589, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05334276705980301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04910239577293396, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.04818575829267502, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04112677648663521, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03465203940868378, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.032909829169511795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.025561682879924774, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.021810494363307953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.021185491234064102, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019833818078041077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.018908100202679634, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015704642981290817, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018265245482325554, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.013639439828693867, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.015965096652507782, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10886794328689575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10886794328689575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1830281913280487, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.17293161153793335, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16985146701335907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15546685457229614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08549634367227554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08212248980998993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09481705725193024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08747061342000961, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08653458952903748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07808924466371536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07487861067056656, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.048656314611434937, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04226483777165413, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.0413915291428566, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.041199084371328354, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024418137967586517, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.0221974179148674, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.022141972556710243, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.020801441743969917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.0206780843436718, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013481407426297665, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014657680876553059, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01320700440555811, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011234959587454796, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09481705725193024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09481705725193024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21342290937900543, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.20171481370925903, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.19822347164154053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.18146736919879913, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09945257008075714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09551704674959183, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11033819615840912, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10173872858285904, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1006605252623558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09079638868570328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0870678722858429, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05631376430392265, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04862455278635025, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04760771989822388, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.047379132360219955, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.028150850906968117, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024621035903692245, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02455892041325569, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022889085114002228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.022740140557289124, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015026547014713287, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01502860989421606, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014692363329231739, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010249054059386253, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11033819615840912, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11033819615840912, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.3.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.04914315789937973, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.04448586329817772, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.024485085159540176, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.021591342985630035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.013994412496685982, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.012648195028305054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.04262683168053627, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.037497617304325104, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.014596141874790192, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.011785289272665977, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.011137627065181732, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.008943906053900719, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.0077093858271837234, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.00690484931692481, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.007308472413569689, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.004605722147971392, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.004766628611832857, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.0038015530444681644, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.004408927634358406, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.003332664491608739, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.003956424072384834, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.003944658674299717, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.0024335444904863834, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.002210419625043869, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.04914315789937973, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.04914315789937973, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.061667028814554214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.05637658014893532, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.05421841889619827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.048506852239370346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.02822963520884514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.026196768507361412, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.032898224890232086, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.03019033744931221, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.028859619051218033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.02479535900056362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.023520348593592644, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0166889987885952, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.014413680881261826, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.013538910076022148, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.013326957821846008, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.008345660753548145, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0070206886157393456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0069371722638607025, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.006338623818010092, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.006206520367413759, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0043762740679085255, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.004410919267684221, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.004079957026988268, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0029777796007692814, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.061667028814554214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.061667028814554214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.05358317494392395, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.048949629068374634, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.046991582959890366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.04204914718866348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.024519875645637512, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.022699395194649696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.02878967672586441, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.026355121284723282, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.025079550221562386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.021534208208322525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.020493725314736366, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.014602596871554852, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.012567984871566296, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.01175656821578741, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.011561665683984756, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.007305691484361887, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.00608486495912075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.006005654111504555, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.005491225048899651, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.005364371929317713, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0038227466866374016, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.003817106131464243, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.003546598833054304, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.002543186768889427, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.05358317494392395, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.05358317494392395, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1746962070465088, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1619596779346466, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15776604413986206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.14229834079742432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.08128588646650314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07671801000833511, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0919063612818718, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.08444763720035553, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.08275134861469269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07228422164916992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06852231919765472, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.046694133430719376, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04029037430882454, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.038867391645908356, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.038514092564582825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02331560291349888, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.019814053550362587, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.019708095118403435, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.017970062792301178, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01774834282696247, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.012032825499773026, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01177340466529131, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.011501750908792019, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.007400516886264086, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0919063612818718, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0919063612818718, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.18251216411590576, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.15854409337043762, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.14790299534797668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12410171329975128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0853881984949112, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0749022513628006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10490844398736954, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09508104622364044, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.08850514888763428, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06863369792699814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06281014531850815, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05466391146183014, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04647724702954292, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.042000457644462585, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.040904827415943146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.027742544189095497, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.023141315206885338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02273591235280037, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020060153678059578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019368959590792656, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015739168971776962, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016491521149873734, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014355409890413284, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01285813469439745, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10490844398736954, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10490844398736954, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.19053472578525543, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.17935185134410858, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.17596890032291412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.16049443185329437, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08895751088857651, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0851336270570755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09871138632297516, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09117240458726883, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09016643464565277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08067523688077927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07697372138500214, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.050240762531757355, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04358299821615219, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.042583905160427094, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.042352985590696335, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.025093844160437584, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02195017598569393, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.021886806935071945, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.020279083400964737, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02013469859957695, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013183706440031528, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013298159465193748, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012850950472056866, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008920103311538696, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09871138632297516, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09871138632297516, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22558577358722687, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21242807805538177, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.20852498710155487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19027552008628845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10535068064928055, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10083509981632233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11686765402555466, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10792471468448639, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1067662462592125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09554529190063477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09117846935987473, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05938367545604706, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05147331580519676, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05030856281518936, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.050031471997499466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.029608063399791718, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.025609485805034637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.025533750653266907, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02359243668615818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.023416534066200256, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015313923358917236, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015016977675259113, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014919746667146683, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009413995780050755, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11686765402555466, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11686765402555466, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.4.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2326095700263977, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.20890545845031738, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20026862621307373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17829233407974243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10714473575353622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09811653941869736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.12450958788394928, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11443693190813065, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11041547358036041, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09258716553449631, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08715962618589401, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06358064711093903, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.054877784103155136, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05157521739602089, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05077306553721428, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.031878165900707245, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.026947468519210815, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.026792820543050766, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.024075092747807503, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.0235618706792593, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.017110764980316162, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01719340868294239, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016046157106757164, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.011928608641028404, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11443693190813065, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11443693190813065, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07243998348712921, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.06626655161380768, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.06361710280179977, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.056892771273851395, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0332944393157959, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03080212138593197, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.03887711092829704, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0358034148812294, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.034027229994535446, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.02923206053674221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.027645835652947426, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.019730357453227043, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.017111830413341522, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.016001923009753227, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.01572873443365097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.009880264289677143, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.008356031961739063, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.008246447890996933, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.007553757168352604, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.007382084149867296, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.005207112990319729, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00534017663449049, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.004832091741263866, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0036870758049190044, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07243998348712921, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07243998348712921, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.06226812303066254, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.05690913647413254, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.05450456589460373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.048761069774627686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.02861650660634041, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.02642177604138851, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.03383326157927513, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.030950751155614853, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.029249995946884155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.02512214705348015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.023912720382213593, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.017184432595968246, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.01478860154747963, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.013744809664785862, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.013494341634213924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.00860010739415884, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.007145626004785299, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.007040313445031643, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0064501455053687096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.006289334036409855, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.004518256988376379, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.004538676701486111, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.004171804059296846, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.003071109764277935, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.06226812303066254, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.06226812303066254, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1853746473789215, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.17170937359333038, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16719016432762146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15061797201633453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.08646444976329803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08144918829202652, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.09801161289215088, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0899355560541153, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.08802846074104309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07673897594213486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07263737916946411, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0498601570725441, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04294854402542114, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04136892408132553, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04099206626415253, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02489074505865574, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.021097498014569283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02097860909998417, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019104497507214546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.018861038610339165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.012861317954957485, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.012565507553517818, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.012275115586817265, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.007907486520707607, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.09801161289215088, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.09801161289215088, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.18789565563201904, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16546306014060974, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15728630125522614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.13191308081150055, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.08820947259664536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.079545758664608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10342478007078171, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09433363378047943, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0908333882689476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07192181795835495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06451553106307983, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05367299169301987, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04606454074382782, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04329945519566536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04263629764318466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.027217809110879898, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.023740313947200775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023527881130576134, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020737595856189728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02032291889190674, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0153496777638793, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016541501507163048, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014511065557599068, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01299981027841568, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10342478007078171, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10342478007078171, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1763671189546585, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16571366786956787, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16238242387771606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14772802591323853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08269257843494415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07899637520313263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09194625169038773, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08494357019662857, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08387379348278046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07469125092029572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07115642726421356, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.046946533024311066, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04070013761520386, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03970407694578171, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03946533054113388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023467570543289185, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020634522661566734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020571313798427582, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019037676975131035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01889154687523842, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012460676953196526, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012770039960741997, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012134081684052944, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008870871737599373, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09194625169038773, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09194625169038773, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23180197179317474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21784083545207977, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2135988026857376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19442293047904968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10860481858253479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10375391691923141, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12076292186975479, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11146453022956848, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11011612415313721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09812377393245697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09342765063047409, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06137024611234665, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.053214628249406815, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05191122740507126, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05160055309534073, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030641397461295128, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026415269821882248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026331817731261253, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024255817756056786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02405565045773983, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015885625034570694, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015500682406127453, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015441078692674637, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009691988117992878, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11146453022956848, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11146453022956848, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.5.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.23265472054481506, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2086765170097351, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.19994834065437317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17756593227386475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1072070375084877, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09807097911834717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1248169094324112, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11479973047971725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11059398949146271, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09254928678274155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08700612187385559, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06383217871189117, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.055132195353507996, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.051675714552402496, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05083820968866348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.0320747010409832, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02712440863251686, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.026964690536260605, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.024254295974969864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.0237213633954525, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.017337951809167862, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.017484325915575027, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01624344475567341, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012307185679674149, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11479973047971725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11479973047971725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0868612602353096, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08028934895992279, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.07762614637613297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.06980173289775848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.040294695645570755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03772316128015518, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0464106909930706, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0427042581140995, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04106827825307846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03573520481586456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03385653719305992, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.023596586659550667, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.020437240600585938, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.019376112148165703, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.019124701619148254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01181600708514452, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010099586099386215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010000143200159073, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.009193726815283298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009032146073877811, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006239150185137987, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006382873747497797, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.005880147684365511, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004411332309246063, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0868612602353096, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0868612602353096, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07159629464149475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.06623396277427673, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.06393489986658096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.057521093636751175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.03323479741811752, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.031058529391884804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.038720253854990005, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.03540864959359169, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.033857595175504684, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.029518717899918556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.028110487386584282, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.019684607163071632, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.016933444887399673, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.015960734337568283, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.015724236145615578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.009855417534708977, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.008265375159680843, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.008168681524693966, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0075193122029304504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.007368756923824549, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.005170677788555622, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.005169066600501537, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0048398119397461414, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0034705493599176407, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07159629464149475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07159629464149475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20244741439819336, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18831124901771545, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1835828423500061, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.16583585739135742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09471315145492554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0895538255572319, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10701289027929306, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09827598184347153, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09633971750736237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08443066477775574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07996706664562225, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05442546680569649, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.046920306980609894, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0453040674328804, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04491333290934563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.027147576212882996, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.023059744387865067, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.022937361150979996, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020932095125317574, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02068857103586197, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.013970019295811653, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01362527534365654, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.013357478193938732, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.008467503823339939, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10701289027929306, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10701289027929306, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.19160927832126617, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1603095680475235, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.14735227823257446, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12180043011903763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0894622802734375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07573191821575165, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10895392298698425, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09926792234182358, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0923440083861351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0685511901974678, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06095974147319794, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05649731680750847, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04831857234239578, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04383213445544243, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04273608699440956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028446905314922333, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024009404703974724, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023573899641633034, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020209476351737976, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019484378397464752, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015911495313048363, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016947828233242035, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014505153521895409, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.013105351477861404, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10895392298698425, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10895392298698425, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17690065503120422, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1659352332353592, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16253475844860077, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14783720672130585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08305329829454422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07915399223566055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09242825955152512, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08544935286045074, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08424661308526993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07485804706811905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07117180526256561, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.047081489115953445, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.040874287486076355, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03979554772377014, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.039537213742733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023536067456007004, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020484071224927902, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02041480876505375, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01882808282971382, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018665991723537445, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012333131395280361, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01240127719938755, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011971784755587578, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008250374346971512, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09242825955152512, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09242825955152512, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2356182038784027, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.221128910779953, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21668469905853271, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.197226881980896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1105782762169838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10544104874134064, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12316939979791641, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11366011947393417, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11218102276325226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09974069893360138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09488832205533981, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06265752017498016, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05429637432098389, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05289708077907562, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05256015807390213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03129718825221062, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026949025690555573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026856884360313416, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02470744028687477, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024497568607330322, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01626473106443882, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01587304100394249, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015783943235874176, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009986676275730133, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11366011947393417, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11366011947393417, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.6.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2383609265089035, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.21207866072654724, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2021518349647522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1794668287038803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10963331907987595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09938133507966995, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.12895582616329193, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11846236139535904, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11345323920249939, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09398376941680908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.0885196104645729, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06594680994749069, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05691438913345337, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05293452739715576, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.051969435065984726, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03312014043331146, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.0279077161103487, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.027708547189831734, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02485588937997818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024237418547272682, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01792616955935955, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018223708495497704, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016662368550896645, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012958892621099949, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11345323920249939, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11345323920249939, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.08750366419553757, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08106645196676254, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.07846878468990326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07054101675748825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.040588151663541794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03804619610309601, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0467364527285099, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04300235956907272, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.041343580931425095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.036045659333467484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.034204211086034775, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.023761892691254616, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.020558064803481102, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.019483964890241623, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.019223764538764954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.011892333626747131, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010084151290357113, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.00997862033545971, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.009169756434857845, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009003566578030586, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006232656538486481, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006274599581956863, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.005865106359124184, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004211236257106066, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.08750366419553757, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.08750366419553757, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07370685040950775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.06835446506738663, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0659630224108696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.05933724716305733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.034223295748233795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03199010714888573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.03977131471037865, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.03654276207089424, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.034838274121284485, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.030427362769842148, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.02894776314496994, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.020209213718771935, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.017469145357608795, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.01642468385398388, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.016171567142009735, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.010110774077475071, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.008494599722325802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.008384970016777515, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.007730509620159864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.00756937637925148, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00529031315818429, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.005300209857523441, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.004941117949783802, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.003525087144225836, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07370685040950775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07370685040950775, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1973758190870285, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18376445770263672, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17928050458431244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.16180278360843658, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09239010512828827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08738450706005096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1046617180109024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09582136571407318, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09389082342386246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08236578106880188, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07818964868783951, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05327805504202843, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04575872793793678, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04417327418923378, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04379468783736229, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02657349221408367, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.022499987855553627, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.022375252097845078, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020428750663995743, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.020182156935334206, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.013691515661776066, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.013310805894434452, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.013059782795608044, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.008298033848404884, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1046617180109024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1046617180109024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2106463760137558, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18198904395103455, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1716032773256302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.144483745098114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09693208336830139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08584100753068924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11534906923770905, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10561355203390121, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10112042725086212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07890329509973526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07132688909769058, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.059772785753011703, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05156584829092026, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04769601672887802, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04675063490867615, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03027699701488018, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02635580115020275, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02609487809240818, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02306496351957321, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02247891202569008, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017130544409155846, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01870669797062874, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015977997332811356, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014825657941401005, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11534906923770905, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11534906923770905, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.15618763864040375, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1464388221502304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.14334611594676971, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1303236037492752, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07340270280838013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0699065625667572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08189860731363297, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07558106631040573, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.0744728296995163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06612329930067062, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06286916136741638, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.041756995022296906, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03621680662035942, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.035219576209783554, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.034987643361091614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02088865265250206, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.018227027729153633, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.018160462379455566, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01676180772483349, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.016615496948361397, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.011037260293960571, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.011179301887750626, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.010705593973398209, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00761294923722744, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08189860731363297, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08189860731363297, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22752749919891357, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21349464356899261, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.20904728770256042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19022585451602936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10705835372209549, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.102053701877594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11932982504367828, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1101214662194252, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10864037275314331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09649483859539032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09187084436416626, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06092312932014465, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05273300036787987, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.051337990909814835, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05101877078413963, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03043534606695175, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02650955319404602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026415083557367325, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02437729202210903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02417118102312088, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016034234315156937, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016155986115336418, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015567705035209656, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010881842114031315, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1101214662194252, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1101214662194252, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.7.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.22561609745025635, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.20131932199001312, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.19212447106838226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17027249932289124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10387442260980606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09438155591487885, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.12305822223424911, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11198507994413376, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.10735060274600983, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.08909430354833603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08386852592229843, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06258269399404526, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05400435999035835, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05038364231586456, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.04949319735169411, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.031667985022068024, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.026972340419888496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.026792364194989204, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.024158474057912827, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.023600058630108833, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.017598262056708336, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018093902617692947, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016450056806206703, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013450298458337784, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11198507994413376, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11198507994413376, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09504994004964828, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08820025622844696, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08537318557500839, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07684294879436493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.044129688292741776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.041411664336919785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.050827015191316605, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04671443626284599, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.044898685067892075, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.039255112409591675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.037277620285749435, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.025821533054113388, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.022325169295072556, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.021158544346690178, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.020884180441498756, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012921273708343506, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010946080088615417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010835534892976284, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.00997165497392416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009792161174118519, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006772505585104227, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006803782191127539, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006376628298312426, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004561682231724262, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09504994004964828, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09504994004964828, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07820837944746017, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.07262822240591049, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.07012888044118881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0631059855222702, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0363396480679512, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03400667756795883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.042164552956819534, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0387968048453331, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.03697937726974487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03235860913991928, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.030761225149035454, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02142231911420822, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.018540799617767334, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.017439693212509155, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.01717246137559414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.010718921199440956, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.009010178968310356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.008898316882550716, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.00821085274219513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.008042605593800545, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.005616395268589258, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0056168558076024055, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.005258829798549414, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.003731037490069866, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07820837944746017, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.07820837944746017, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2131573110818863, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1986602395772934, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.19374175369739532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1749483346939087, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09973818063735962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09443406760692596, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1133115291595459, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10353977978229523, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1014159768819809, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.089084193110466, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08463606983423233, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05767664313316345, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04945429787039757, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04771413654088974, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04729640856385231, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02880297601222992, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024287564679980278, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02414451725780964, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.022075073793530464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0218079574406147, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.014845801517367363, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.014351507648825645, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014126613736152649, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.008925805799663067, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1133115291595459, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1133115291595459, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2103385478258133, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18308132886886597, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17363199591636658, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1458435207605362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09816157817840576, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08783099800348282, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11463842540979385, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10483351349830627, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10122735053300858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07906688004732132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07117323577404022, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.059228088706731796, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05120030418038368, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04819126054644585, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04747492074966431, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.029901329427957535, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02649623528122902, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026265010237693787, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02297678217291832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02253798022866249, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01685548759996891, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01850106008350849, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01595415361225605, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01462282333523035, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11463842540979385, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11463842540979385, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17730291187763214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16632875800132751, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16281196475028992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14805646240711212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08354263752698898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0796085000038147, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09327101707458496, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08604826033115387, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08476804941892624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07525407522916794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0716324895620346, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04769550636410713, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.041341159492731094, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04020216688513756, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03993038833141327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023873841390013695, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020985232666134834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020902981981635094, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019334714859724045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019167879596352577, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012745853513479233, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013131393119692802, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01236956287175417, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00926029123365879, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09327101707458496, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09327101707458496, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23607151210308075, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22157609462738037, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21700991690158844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19736841320991516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1110021248459816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10579684376716614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12405584007501602, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11421097815036774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11262497305870056, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10001794248819351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09516798704862595, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06311238557100296, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05459066852927208, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.0531291700899601, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.052787672728300095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03155564144253731, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027127796784043312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027027826756238937, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024863850325345993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024639828130602837, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016486451029777527, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016071589663624763, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015984030440449715, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010222657583653927, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11421097815036774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11421097815036774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.8.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24584302306175232, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2176925241947174, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20732611417770386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18343429267406464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1133144199848175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.102305568754673, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.133724182844162, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12223011255264282, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11732131242752075, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09637512266635895, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09055577218532562, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06827885657548904, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.058937884867191315, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.0548819936811924, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05389760062098503, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.034444451332092285, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02919198013842106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.029005425050854683, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025941630825400352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025311393663287163, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018916722387075424, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019384758546948433, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017644383013248444, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014163119718432426, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1133144199848175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1133144199848175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10639525204896927, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09879898279905319, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09570613503456116, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0862402468919754, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04959779977798462, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04654973745346069, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05700318515300751, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05243504047393799, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05045274272561073, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.044162679463624954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04194394871592522, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029079994186758995, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02522987686097622, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02393338829278946, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.023617399856448174, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01460973359644413, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012617615051567554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012489337474107742, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011555950157344341, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011360645294189453, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007810468785464764, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00816927570849657, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007377798669040203, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005872497800737619, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10639525204896927, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10639525204896927, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09065394103527069, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08418123424053192, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08139173686504364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07340134680271149, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04221799224615097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.039564430713653564, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.048869844526052475, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04491466283798218, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04296797141432762, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.037627510726451874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03579489141702652, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02487040124833584, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.021476658061146736, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02027643471956253, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.019991541281342506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012449454516172409, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010503256693482399, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010383263230323792, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.00958021730184555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009398146532475948, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0065314192324876785, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0065596136264503, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006128058303147554, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004411287605762482, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09065394103527069, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09065394103527069, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2104182243347168, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1961694359779358, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.19126541912555695, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.17280994355678558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0985914021730423, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09328410029411316, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11204099655151367, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10237456113100052, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10020479559898376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0880478098988533, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08356426656246185, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.057007256895303726, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04892433062195778, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04717382416129112, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04676178842782974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028484059497714043, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024057995527982712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02390945702791214, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.021880576387047768, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.021615104749798775, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.014714276418089867, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.014295214787125587, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014002365060150623, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.008985750377178192, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11204099655151367, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11204099655151367, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21602898836135864, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18514351546764374, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17274856567382812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.14096975326538086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09948962926864624, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08697917312383652, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12209205329418182, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1110561192035675, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10356607288122177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07899319380521774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07140081375837326, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06318461149930954, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0544905960559845, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.049326006323099136, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.048035964369773865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0320318304002285, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027780137956142426, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027333298698067665, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024173568934202194, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023394126445055008, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.018318884074687958, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.02041354402899742, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016755815595388412, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01651267148554325, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1110561192035675, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1110561192035675, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17375700175762177, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1628628969192505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15940460562705994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14480945467948914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08181577175855637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07787465304136276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09134892374277115, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0843658596277237, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08304192125797272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07364879548549652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07002190500497818, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.046707477420568466, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04050551354885101, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03935813903808594, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0390833355486393, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02336079627275467, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020515168085694313, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020436441525816917, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01888902857899666, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018720686435699463, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012408576905727386, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012814601883292198, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01202777586877346, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00898764282464981, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09134892374277115, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09134892374277115, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23577764630317688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2211669236421585, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2165713757276535, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19680984318256378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11086391657590866, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10558287799358368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12388695776462555, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11414648592472076, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11250629276037216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09981152415275574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09495338797569275, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06305548548698425, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.0545622780919075, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05306536704301834, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.052702371031045914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.031494785100221634, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027071954682469368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02696959860622883, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024788236245512962, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.0245584137737751, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01640644669532776, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01601329818367958, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01589388959109783, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010136403143405914, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11414648592472076, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11414648592472076, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.9.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2484891563653946, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.221357524394989, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21130073070526123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18688322603702545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11484567821025848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10416528582572937, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13497257232666016, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12370291352272034, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11870346963405609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09819167852401733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09220735728740692, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0692722499370575, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.059760916978120804, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05570441484451294, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05472596362233162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03494656831026077, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029785126447677612, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02959294617176056, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026615098118782043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025993581861257553, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019293591380119324, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01995546743273735, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018008597195148468, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014791754074394703, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11484567821025848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11484567821025848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11178772896528244, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1037355586886406, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1005774587392807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09052204340696335, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.052054453641176224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04891106113791466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05969178304076195, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05495559424161911, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0529610700905323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04633165895938873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04393046349287033, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030393868684768677, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026334045454859734, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.025021715089678764, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024700433015823364, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015222910791635513, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013016524724662304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012891841121017933, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011875661090016365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011681661009788513, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00802118144929409, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008181669749319553, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007571951020509005, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00562384445220232, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11178772896528244, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11178772896528244, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09073041379451752, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08421088755130768, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08133283257484436, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0732722207903862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04216897115111351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.039475925266742706, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.04888002946972847, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04499003291130066, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04292246326804161, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03756958991289139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03568840026855469, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.024827169254422188, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02150663733482361, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02024814672768116, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.019947512075304985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012426717206835747, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010487979277968407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010354829952120781, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.009563183411955833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.00936721358448267, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00652589974924922, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006561358459293842, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006113006267696619, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004392338916659355, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09073041379451752, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09073041379451752, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21279588341712952, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.19812428951263428, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.19287611544132233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.17399680614471436, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09958502650260925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09401144832372665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11312609165906906, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1039072722196579, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10122915357351303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0888257771730423, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08416002988815308, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05753614008426666, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.049658507108688354, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04764622449874878, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04717983677983284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028730979189276695, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024322861805558205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.024148477241396904, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.022108688950538635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02180096134543419, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.014847230166196823, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.014539315365254879, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014102785848081112, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00915580801665783, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11312609165906906, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11312609165906906, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23669594526290894, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.20649223029613495, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1948111355304718, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.16508814692497253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10989180952310562, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09808576852083206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13113713264465332, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12038140743970871, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11408944427967072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.090577132999897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0818563848733902, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06799650192260742, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.059105537831783295, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05428329482674599, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.053112614899873734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03457082808017731, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030384473502635956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.029996739700436592, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026920286938548088, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026219679042696953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.019809124991297722, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.02199581079185009, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.018456878140568733, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.017741098999977112, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11408944427967072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11408944427967072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17447854578495026, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16353842616081238, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15998490154743195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1451270431280136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08226963877677917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07828968018293381, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09201020002365112, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08492672443389893, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.0834849402308464, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07400127500295639, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07033565640449524, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04711800813674927, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04085564613342285, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.039664410054683685, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.039383843541145325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.0236037690192461, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020833736285567284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020747333765029907, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019201243296265602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019032025709748268, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012687727808952332, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013227006420493126, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012298185378313065, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009526006877422333, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09201020002365112, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09201020002365112, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23656943440437317, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22177596390247345, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21715153753757477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19711320102214813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11119382083415985, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10586488991975784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12440721690654755, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11455974727869034, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11287978291511536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09999299049377441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09510527551174164, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06321115791797638, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05478040128946304, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05325240269303322, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05288808047771454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.031621064990758896, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027226373553276062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02711261622607708, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024918515235185623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024680551141500473, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01652507111430168, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01619761250913143, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016005219891667366, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010363802313804626, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11455974727869034, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11455974727869034, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.10.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2560945749282837, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22821055352687836, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21817708015441895, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1925259828567505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11847803741693497, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.1077379360795021, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13797785341739655, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12700875103473663, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12239385396242142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10112934559583664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09453195333480835, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07077886164188385, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.061331167817115784, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.057426176965236664, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05648983269929886, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03562363237142563, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03065064549446106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030476972460746765, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.027329005300998688, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02673419564962387, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01953127421438694, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.02041695825755596, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018328377977013588, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.015094907023012638, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10112934559583664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10112934559583664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11292700469493866, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10494904220104218, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10167118906974792, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09155552834272385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.052641142159700394, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04942712187767029, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.060437675565481186, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0556652769446373, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05353090539574623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04687034338712692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04444510117173195, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030752340331673622, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026642726734280586, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0252549946308136, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024928344413638115, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015387347899377346, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013080599717795849, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012941251508891582, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01192201767116785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01171131432056427, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008075506426393986, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008146114647388458, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007605359889566898, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005474957637488842, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11292700469493866, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11292700469493866, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09389462321996689, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08730360865592957, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0843447893857956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07597994804382324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04375358670949936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04098869487643242, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05064789205789566, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04669754207134247, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04449547454714775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03899645060300827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.037030793726444244, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.025725627318024635, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02231576293706894, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.020995544269680977, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02067255973815918, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012870044447481632, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010841225273907185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01070108637213707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.009881801903247833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009678386151790619, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006730519235134125, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00673953490331769, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006299828179180622, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004450209904462099, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09389462321996689, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09389462321996689, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21438813209533691, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.19967083632946014, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1942458152770996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1752486377954483, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10035789012908936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09475859999656677, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11428709328174591, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10479795932769775, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10199175775051117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08950954675674438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08477863669395447, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05810140445828438, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05006108060479164, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04803527891635895, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04754626005887985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.029016412794589996, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024547401815652847, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.024368267506361008, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02230987884104252, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02199685573577881, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015000814571976662, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.014692380093038082, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014239685609936714, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009300723671913147, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11428709328174591, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11428709328174591, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22700758278369904, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.19887113571166992, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.18663236498832703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1617293655872345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10506254434585571, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09348931163549423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12882721424102783, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11734036356210709, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1094922348856926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08801043033599854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08189808577299118, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06647755950689316, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0571129247546196, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05143944174051285, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05003634840250015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03347860276699066, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02806270681321621, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027560077607631683, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02488146908581257, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024005871266126633, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01850869134068489, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.019735800102353096, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016716061159968376, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.015088685788214207, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1094922348856926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1094922348856926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.16930049657821655, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.15857194364070892, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15500348806381226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1405150592327118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07980407029390335, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07588405907154083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0893959030508995, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08247107267379761, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08100832998752594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07169454544782639, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.068143829703331, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04577159509062767, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03969192877411842, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.038494762033224106, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0382101908326149, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.022916201502084732, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.0202315766364336, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020146837458014488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01863233372569084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01846424862742424, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012300635688006878, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012869749218225479, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011906887404620647, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009285026229918003, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0893959030508995, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0893959030508995, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23184160888195038, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21712042391300201, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21241864562034607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19259347021579742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10892603546380997, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10365048795938492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12190179526805878, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11235520988702774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11059149354696274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0978507325053215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09282739460468292, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.061934299767017365, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.053693290799856186, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05216049775481224, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.051800768822431564, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030942369252443314, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026651091873645782, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026536600664258003, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02436228096485138, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024130022153258324, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01604141667485237, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01583147421479225, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015510661527514458, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01008246373385191, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11235520988702774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11235520988702774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.11.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25520816445350647, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22769790887832642, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21764546632766724, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1915133148431778, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1181880533695221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10746793448925018, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1377795785665512, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1266491562128067, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12201068550348282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10070966184139252, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09406862407922745, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07077767699956894, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.061092309653759, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05716903135180473, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05623326078057289, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03567664697766304, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03028128109872341, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030107490718364716, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026894712820649147, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02629791758954525, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019602466374635696, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019896404817700386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018406063318252563, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014420092105865479, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10070966184139252, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10070966184139252, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11827576160430908, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1098535880446434, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10648025572299957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09581686556339264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05513047054409981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.051787056028842926, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06320391595363617, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.058182187378406525, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05607951059937477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04908442869782448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04647507891058922, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.032154686748981476, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.027816513553261757, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.026432812213897705, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02610698528587818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01607932522892952, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0136203458532691, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013487933203577995, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012394539080560207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012182791717350483, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008402151986956596, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008376389741897583, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007930395193397999, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005515948869287968, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1098535880446434, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1098535880446434, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09770757704973221, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09085884690284729, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0877825915813446, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07905308157205582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04557379335165024, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04270123317837715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05264510214328766, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.048569485545158386, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04634427651762962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.040621742606163025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03853686526417732, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.026750488206744194, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023209348320961, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02185276336967945, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021523011848330498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013371194712817669, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011261180974543095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011121541261672974, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01026296429336071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010057862848043442, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006984908599406481, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006968738976866007, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006552631501108408, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004575507249683142, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09770757704973221, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09770757704973221, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22470417618751526, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.20919255912303925, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2034822404384613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1834218055009842, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.1053289845585823, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09932080656290054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12000526487827301, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11009924113750458, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10710377246141434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09389526396989822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08885899186134338, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.061076320707798004, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.052693646401166916, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05045318976044655, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04990081116557121, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030547184869647026, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02584352344274521, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.025641679763793945, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.023502323776483536, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02315690368413925, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01587880216538906, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015590190887451172, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015063405968248844, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009991347789764404, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11009924113750458, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11009924113750458, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24017450213432312, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2139786034822464, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.20345300436019897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1741752177476883, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11240852624177933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10156284272670746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13237647712230682, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12128271907567978, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11565712839365005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09346714615821838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0868164598941803, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06809190660715103, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.058972105383872986, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.055006448179483414, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.054051849991083145, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03437511622905731, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030077680945396423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02972433902323246, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02655765227973461, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025944264605641365, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01916508749127388, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.02091161161661148, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017962029203772545, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.016284167766571045, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11565712839365005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11565712839365005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1767471432685852, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1654706746339798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16172954440116882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14664003252983093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0834324061870575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07930058240890503, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09350347518920898, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08624152094125748, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08471467345952988, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07492092251777649, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07120862603187561, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.047899406403303146, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.041542936116456985, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04028313234448433, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03998330980539322, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02400071732699871, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021246232092380524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02115614525973797, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019577443599700928, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01939542219042778, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012922543101012707, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013611172325909138, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012506633065640926, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009914994239807129, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09350347518920898, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09350347518920898, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2377096265554428, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22261634469032288, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2178061455488205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19742891192436218, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1118205115199089, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10635271668434143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1252385675907135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11535343527793884, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11353909224271774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10041619092226028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09527299553155899, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0637410581111908, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.055146973580121994, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05353327840566635, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05315212160348892, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03183772414922714, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027316683903336525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02719823271036148, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02495555207133293, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024709578603506088, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016527943313121796, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016170915216207504, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015961362048983574, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010212400928139687, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11535343527793884, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11535343527793884, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.12.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2643640339374542, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23531195521354675, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22508171200752258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1979474276304245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12247094511985779, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11121437698602676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1426195204257965, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13084706664085388, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12648601830005646, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10406932234764099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09715983271598816, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07330349087715149, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06320898979902267, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.059285376220941544, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05834776908159256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03705797716975212, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03150596842169762, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03136669844388962, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.027964919805526733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.027367476373910904, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020552460104227066, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020809143781661987, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.019356144592165947, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.015238676220178604, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10406932234764099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10406932234764099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.123802550137043, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11500391364097595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11144636571407318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10028393566608429, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05777621641755104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05426201596856117, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0662703737616539, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0609670951962471, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05875834450125694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05143122002482414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04871595278382301, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03372820094227791, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029210027307271957, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02776319719851017, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027419479563832283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016884302720427513, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014422394335269928, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014286388643085957, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013158043846487999, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012938974425196648, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008885216899216175, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009034130722284317, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00839734636247158, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006174449808895588, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11500391364097595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11500391364097595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10000727325677872, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09293030202388763, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08957920223474503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08060586452484131, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.046581752598285675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04354912042617798, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.054207801818847656, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.049983806908130646, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04739206284284592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04150751233100891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.039428725838661194, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.027527429163455963, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02388334460556507, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022359302267432213, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021989982575178146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01377908419817686, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011539526283740997, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011376533657312393, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01052006147801876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01028500311076641, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007210835348814726, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007196090649813414, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006723267491906881, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00474767480045557, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10000727325677872, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10000727325677872, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.232999786734581, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21694082021713257, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21110102534294128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.190281480550766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10925383120775223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1030859649181366, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12438207119703293, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11402028799057007, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11106349527835846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09737583994865417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09217328578233719, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06336014717817307, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05457420274615288, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0523262619972229, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.051773179322481155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03167961537837982, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026796434074640274, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026596637442708015, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02436813712120056, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02402077428996563, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01644776202738285, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016154635697603226, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015613610856235027, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010366741567850113, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11402028799057007, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11402028799057007, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24463902413845062, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21484288573265076, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.20474548637866974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1723344326019287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11426018923521042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1030835509300232, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1334160566329956, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12179344892501831, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11784646660089493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0936332419514656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08427943289279938, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06905399262905121, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05941860377788544, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05605112388730049, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05521886423230171, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.034996915608644485, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030641615390777588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.030413903295993805, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02681797184050083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026298200711607933, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.019721658900380135, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.021217914298176765, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.018744170665740967, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.016625957563519478, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11426018923521042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11426018923521042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1778421550989151, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16646340489387512, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16263994574546814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14749738574028015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08401980251073837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07984831184148788, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09412761777639389, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08685117959976196, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08531886339187622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0754016861319542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07162810117006302, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04823843017220497, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04184158891439438, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04056293144822121, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.040259309113025665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024181796237826347, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021355312317609787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02126624621450901, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019663194194436073, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019481204450130463, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013024178333580494, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013643286190927029, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012613911181688309, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009882846847176552, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09412761777639389, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09412761777639389, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24201388657093048, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22666513919830322, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22176219522953033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2010200023651123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11390485614538193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10835712403059006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12750275433063507, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11748553812503815, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11563722789287567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10223983228206635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09709424525499344, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06493129581212997, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.056170325726270676, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05455708131194115, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05417763069272041, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03245344012975693, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02786201424896717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027744732797145844, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.0254503283649683, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02520286664366722, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016907958313822746, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01652989722788334, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016363387927412987, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010496139526367188, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11563722789287567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11563722789287567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.13.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2630031108856201, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23398704826831818, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22348414361476898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1969965547323227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12156778573989868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11034795641899109, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1427956372499466, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.130525603890419, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1257205605506897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10348037630319595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09677555412054062, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0729280561208725, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06281864643096924, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05877521634101868, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05779128521680832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.036639079451560974, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.031062263995409012, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030887555330991745, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02754257060587406, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026916299015283585, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019891565665602684, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020325809717178345, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.0185820534825325, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014600039459764957, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10348037630319595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10348037630319595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12829019129276276, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11934299021959305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11578705161809921, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10433897376060486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05990487337112427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05635109171271324, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06861458718776703, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06311824917793274, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.060903795063495636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0534152016043663, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.050649117678403854, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.034898094832897186, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.030188824981451035, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02872568555176258, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.028373979032039642, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017444344237446785, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01481249462813139, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014670156873762608, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013504222966730595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013279525563120842, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009109795093536377, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009121123701334, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00859924592077732, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006027642637491226, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11578705161809921, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11578705161809921, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1024695485830307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09540273249149323, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09207212179899216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08301335573196411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04779009148478508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.044745106250047684, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05538218840956688, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.051128238439559937, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04859248548746109, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.042652398347854614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04051632061600685, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.028101159259676933, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.024421347305178642, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022924717515707016, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02255314588546753, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014049941673874855, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01180726382881403, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011643152683973312, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01076542492955923, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010531803593039513, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0073268054984509945, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007307309657335281, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006849103607237339, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0047652581706643105, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1024695485830307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1024695485830307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2360934466123581, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22018522024154663, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21412178874015808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19328884780406952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11086704581975937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1045779287815094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12671531736850739, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11615811288356781, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11265847831964493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09908809512853622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09395817667245865, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.064527228474617, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0556783601641655, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.053135428577661514, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05252879858016968, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03224939480423927, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027180340141057968, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026940137147903442, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02476140297949314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024377914145588875, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016768136993050575, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016385840252041817, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015871644020080566, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010432744398713112, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11615811288356781, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11615811288356781, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24406343698501587, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22183066606521606, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21439318358898163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1881958246231079, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11463041603565216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1062086746096611, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13121497631072998, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11995875090360641, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11710158735513687, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09817605465650558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09107987582683563, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06783748418092728, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05842454731464386, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05600903928279877, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05543095991015434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03445451334118843, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.03038952685892582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.03022819571197033, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02729758433997631, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02692718617618084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.019580865278840065, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.020634694024920464, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01885279268026352, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.016023335978388786, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11463041603565216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11463041603565216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1808006912469864, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1692347377538681, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16542689502239227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14998500049114227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08535628765821457, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08112490922212601, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09560216218233109, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08826734870672226, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08667843788862228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07659467309713364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07269874960184097, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04895053431391716, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04244401305913925, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04113618656992912, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04082683473825455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024483157321810722, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021544720977544785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02144906483590603, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019813761115074158, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01962069422006607, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013067610561847687, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01361098326742649, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012634542770683765, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009690317325294018, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09560216218233109, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09560216218233109, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24231472611427307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2269299477338791, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22196899354457855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20124468207359314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11410573124885559, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1085115447640419, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12766236066818237, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11770597100257874, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11585545539855957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10246718674898148, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0971376970410347, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06495894491672516, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.056277256458997726, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.054643549025058746, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05424892157316208, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03243551403284073, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027887243777513504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027768157422542572, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025473585352301598, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.025226380676031113, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01681739091873169, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016526352614164352, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016244303435087204, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010460291057825089, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11585545539855957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11585545539855957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.14.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2640755772590637, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23476329445838928, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22418546676635742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19741898775100708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12194476276636124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11058865487575531, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14279213547706604, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13099345564842224, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1260903924703598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10366925597190857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.0969509482383728, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0729844868183136, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06297212839126587, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05883896350860596, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.057834137231111526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.036737311631441116, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.030897829681634903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03071935474872589, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02731584757566452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02666959911584854, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019900690764188766, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019977498799562454, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018587253987789154, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.01406469102948904, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10366925597190857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10366925597190857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12185056507587433, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11345972865819931, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11008258908987045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0993037074804306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05699857324361801, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.053642719984054565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06536141782999039, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06009325385093689, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05793006718158722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.050876908004283905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04832832142710686, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03326006606221199, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028764499351382256, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.027350300922989845, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027011489495635033, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016642801463603973, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014101547189056873, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013965701684355736, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012866240926086903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012654300779104233, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008701704442501068, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008682415820658207, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008224458433687687, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005738493520766497, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11345972865819931, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11345972865819931, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10008548200130463, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.0931554064154625, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08994519710540771, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08114413172006607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.046724312007427216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.043755028396844864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05410635843873024, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04995249956846237, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.047512542456388474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04172950237989426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03960375860333443, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02747000753879547, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023860100656747818, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022419212386012077, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.022071830928325653, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0137318791821599, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01155763678252697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011397968046367168, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010540044866502285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010316538624465466, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0071740224957466125, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0071618580259382725, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0067160241305828094, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004688933491706848, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10008548200130463, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10008548200130463, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24282914400100708, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22669163346290588, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22074520587921143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19946467876434326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11423488706350327, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10791280120611191, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13006606698036194, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1192803606390953, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11606081575155258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10213833302259445, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09688103944063187, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06630705296993256, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05712860822677612, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05475051701068878, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.054178234189748764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03313887491822243, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02801024354994297, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027784859761595726, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025522226467728615, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02515263669192791, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01720368303358555, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016849813982844353, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016325606033205986, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010733337141573429, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11606081575155258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11606081575155258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23320305347442627, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.20863431692123413, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.19728884100914001, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.173225536942482, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10843166708946228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0979289710521698, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1323859542608261, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12080308794975281, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1125723198056221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09361538290977478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08698660880327225, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06816533207893372, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.058500632643699646, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.052847471088171005, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05144154652953148, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.034246526658535004, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028373045846819878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02783523127436638, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025538582354784012, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024666033685207367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01851794682443142, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.019434304907917976, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016676217317581177, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014336911961436272, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1125723198056221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1125723198056221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1846008449792862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.17270025610923767, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16882124543190002, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15298856794834137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08722547441720963, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08285483717918396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09766422212123871, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09016703069210052, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08857765048742294, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07828296720981598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07428253442049026, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0500003807246685, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04341895878314972, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04208613559603691, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.041769735515117645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02503676526248455, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.022111432626843452, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02202046662569046, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.020344149321317673, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.020149560645222664, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01340674702078104, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01405377872288227, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012975189834833145, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010109790600836277, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09766422212123871, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09766422212123871, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2473127394914627, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23149359226226807, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22649039328098297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2053723782300949, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11654505133628845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1108231469988823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13049069046974182, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12022074311971664, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11831484735012054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10459043830633163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09923793375492096, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0663575530052185, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.057494156062603, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05582353472709656, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05542105808854103, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03318527340888977, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02850436046719551, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.0283806249499321, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026039760559797287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.025779908522963524, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017284326255321503, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016914213076233864, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01670595444738865, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010738968849182129, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11654505133628845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11654505133628845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.15.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.26476338505744934, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23482149839401245, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2236187756061554, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1968013197183609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12223604321479797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11048280447721481, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1437518447637558, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13200518488883972, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12663650512695312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10376087576150894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.097099669277668, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07365282624959946, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06349436938762665, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05903693288564682, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.057955384254455566, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.037085533142089844, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.031075548380613327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030851252377033234, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02745397388935089, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026739591732621193, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020100122317671776, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020223163068294525, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018672168254852295, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014273183420300484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10376087576150894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10376087576150894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12999434769153595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12059103697538376, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11654913425445557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10479994863271713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.060912102460861206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05698127672076225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07036379724740982, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06475429981946945, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06198801100254059, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.054057396948337555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05126487836241722, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.035814959555864334, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.031043613329529762, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.029293958097696304, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.028873519971966743, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017919158563017845, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.015204491093754768, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.015031883493065834, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01383052859455347, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013565757311880589, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009435290470719337, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009542057290673256, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008855856023728848, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0064690690487623215, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11654913425445557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11654913425445557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1109028309583664, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1028837189078331, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09907989203929901, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08917704969644547, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0519639290869236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.048446811735630035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06052794307470322, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05582531914114952, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05288480222225189, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04616984352469444, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04386179894208908, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030776573345065117, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026724105700850487, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.024987710639834404, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024564500898122787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015403504483401775, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012959359213709831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012771411798894405, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011792430654168129, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011522953398525715, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0080878846347332, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008158705197274685, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007536786608397961, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005468649789690971, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1109028309583664, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1109028309583664, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2538781762123108, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23640921711921692, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23039068281650543, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.2078113704919815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11948402971029282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11289171874523163, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1360154151916504, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12408944219350815, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12142156064510345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10633143782615662, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10092730075120926, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06927536427974701, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.059305332601070404, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05719270929694176, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.056691527366638184, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03458581864833832, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.029123660176992416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028950024396181107, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026386503130197525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02606724575161934, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0178350992500782, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017226502299308777, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01692168228328228, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010722662322223186, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10633143782615662, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10633143782615662, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12120886892080307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11402104049921036, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11171063780784607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10184966772794724, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.056986451148986816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.054445281624794006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0642094686627388, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05878344923257828, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0578128844499588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.051670804619789124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.049621812999248505, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.033372972160577774, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02885334938764572, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.028068333864212036, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02787703648209572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017109541222453117, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.015527669340372086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.015475653111934662, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.014549415558576584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014429439790546894, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.010064596310257912, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.010830903425812721, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.009832635521888733, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.008777263574302197, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11402104049921036, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11402104049921036, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17833365499973297, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16684909164905548, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16312025487422943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1478414088487625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08432158082723618, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0801042839884758, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09442226588726044, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08700959384441376, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08559433370828629, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07556001096963882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07179255038499832, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04839276894927025, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04186447337269783, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.040648095309734344, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04035816714167595, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02423408254981041, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021290015429258347, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.021209942176938057, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019552841782569885, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019372692331671715, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013033300638198853, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013436334207654, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012639814056456089, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00958266668021679, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09442226588726044, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09442226588726044, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.25267350673675537, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2364928275346756, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23134292662143707, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20964060723781586, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11913046985864639, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1132836788892746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.133395254611969, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12275657802820206, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.12092554569244385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10680446028709412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10141730308532715, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06801825761795044, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05874146893620491, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05710127204656601, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05671655759215355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03402143344283104, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02921459451317787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.029099183157086372, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026674136519432068, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02642548456788063, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017871756106615067, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017412297427654266, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.017316170036792755, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011185447685420513, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10680446028709412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10680446028709412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.16.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2485891431570053, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22575172781944275, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21760694682598114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1936269849538803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11569903045892715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10702289640903473, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1336624175310135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12257377058267593, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11874014884233475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10078185051679611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09452639520168304, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06833400577306747, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.059074677526950836, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.055919356644153595, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05515899136662483, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03442049399018288, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029548311606049538, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.029405124485492706, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026643669232726097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02616802416741848, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018774088472127914, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019195055589079857, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017766928300261497, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013864262960851192, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11569903045892715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11569903045892715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11661072820425034, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10828632116317749, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10468976199626923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09429723769426346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05472118407487869, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05126935988664627, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06314631551504135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05805010721087456, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.055662062019109726, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04868222773075104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04618007689714432, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03224143013358116, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.027977092191576958, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02644950896501541, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02607879228889942, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016183679923415184, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013970835134387016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013820555992424488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012791465036571026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012567651458084583, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008657877333462238, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00909090880304575, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00814644806087017, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006568513810634613, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11661072820425034, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11661072820425034, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10386094450950623, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09650788456201553, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09311828762292862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08390776067972183, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.048675645142793655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04550657421350479, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.056594882160425186, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.052078209817409515, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04952815920114517, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04333920776844025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04117920622229576, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.028802890330553055, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.024938000366091728, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02340901829302311, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.023034099489450455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014425358735024929, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012132514268159866, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011972634121775627, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01105907466262579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010825754143297672, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007596936076879501, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007620598189532757, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007092304527759552, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005126680247485638, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10386094450950623, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10386094450950623, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2410421371459961, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22464051842689514, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21900047361850739, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1977250576019287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11347226798534393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10728228092193604, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12907305359840393, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11787473410367966, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11528828740119934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10113897174596786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09600087255239487, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06576794385910034, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05635727196931839, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05433352291584015, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05385208502411842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032834336161613464, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027727026492357254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027557093650102615, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02518226020038128, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02486937679350376, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016993213444948196, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01649991236627102, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016142072156071663, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010394951328635216, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11528828740119934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11528828740119934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1950978934764862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.17287608981132507, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16438718140125275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1396680623292923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09127454459667206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08238481730222702, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10719236731529236, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09767502546310425, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09385967254638672, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07541364431381226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0686756819486618, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.055298663675785065, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04789144918322563, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.045014336705207825, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.044324006885290146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02809145301580429, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.025051895529031754, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02483450062572956, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02220708131790161, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.021776992827653885, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016138773411512375, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01787647232413292, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015276861377060413, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014426504261791706, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10719236731529236, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10719236731529236, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.18282578885555267, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.17134897410869598, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1675632894039154, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15190410614013672, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08653818815946579, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08229154348373413, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09688255190849304, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08928399533033371, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08780500292778015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07768096029758453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07373601198196411, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04962914437055588, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04293053597211838, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04169052466750145, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.041390225291252136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024879159405827522, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021792883053421974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02170596458017826, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02003227360546589, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01985161378979683, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013382956385612488, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013696134090423584, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012976547703146935, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009702264331281185, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09688255190849304, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09688255190849304, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.25226160883903503, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23646049201488495, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23154133558273315, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2099774330854416, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11907365918159485, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1133098378777504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13313019275665283, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12259645760059357, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.12078133225440979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10691457986831665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10147649794816971, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06777466088533401, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.0586533322930336, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05703975260257721, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.056652165949344635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.033881016075611115, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.029154732823371887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02903882786631584, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02664732187986374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.026404298841953278, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017700541764497757, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017318831756711006, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01714659109711647, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011066182516515255, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10691457986831665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10691457986831665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.17.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25904199481010437, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23361949622631073, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2243022471666336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19913262128829956, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12046843022108078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11059284955263138, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14034484326839447, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12860514223575592, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12406732141971588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10429956018924713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09792090207338333, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07204366475343704, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06215563043951988, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05844135209918022, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05753801763057709, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03632717579603195, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03125258907675743, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.031071951612830162, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.028160927817225456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02760012447834015, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020085327327251434, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020845264196395874, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018895244225859642, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.015515889041125774, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10429956018924713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10429956018924713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11256485432386398, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10472327470779419, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10129230469465256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0912242978811264, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05269496515393257, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.049418553709983826, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06075455993413925, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05597659572958946, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05358893796801567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04692878574132919, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04452233016490936, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03099069930613041, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02685338258743286, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02535834349691868, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02500324137508869, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015508891083300114, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013218640349805355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013065817765891552, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012074134312570095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011848239228129387, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008199550211429596, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008367410860955715, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007699720561504364, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005768061615526676, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11256485432386398, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11256485432386398, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09644760191440582, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08973906934261322, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0863877534866333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07782761007547379, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.045064859092235565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04206676036119461, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05262700468301773, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04848221689462662, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04580312594771385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0401587076485157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03813550993800163, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02671903371810913, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023198116570711136, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0216553695499897, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021287087351083755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013377164490520954, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011234911158680916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011064521037042141, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01025484874844551, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010017878375947475, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007048855535686016, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007089752238243818, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006557326298207045, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004761462565511465, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09644760191440582, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09644760191440582, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23339496552944183, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2178763896226883, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21198178827762604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19126586616039276, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10976048558950424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10357588529586792, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.125123992562294, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11482515186071396, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1114456057548523, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09792957454919815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09279691427946091, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06368700414896011, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05492234602570534, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.052565209567546844, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05200742557644844, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03179825842380524, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026879901066422462, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026656510308384895, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024454474449157715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024096976965665817, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016475027427077293, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016155268996953964, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015595565550029278, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010247448459267616, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11482515186071396, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11482515186071396, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22534745931625366, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1962144672870636, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.18577514588832855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15471111238002777, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10429885238409042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09339148551225662, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12323718518018723, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11299996823072433, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10848326981067657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08493395149707794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07604587823152542, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06383759528398514, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.055400241166353226, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.051434505730867386, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05048426613211632, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03245646506547928, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028667667880654335, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028403909876942635, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025178274139761925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024588173255324364, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01856483891606331, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.02054121531546116, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017408575862646103, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01652633026242256, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11299996823072433, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11299996823072433, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17975491285324097, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16846492886543274, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16474649310112, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.149525448679924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08501540869474411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08085554838180542, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09513357281684875, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08777674287557602, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08625011146068573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07638014107942581, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07254970073699951, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0487065464258194, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.0422382652759552, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04099433124065399, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0407014861702919, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02439168468117714, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02153731696307659, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02144967019557953, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019837981089949608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019656753167510033, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013108225539326668, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013680491596460342, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012701454572379589, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009854678064584732, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09513357281684875, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09513357281684875, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24591578543186188, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23056717216968536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2256414145231247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20482756197452545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11587606370449066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11031243950128555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12950871884822845, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11933024972677231, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11757125705480576, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10415799915790558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09889620542526245, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06586600095033646, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.057054553180933, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05547651648521423, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05510276183485985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03290260583162308, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02832149900496006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.028210055083036423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025898156687617302, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.025658853352069855, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017144937068223953, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016774272546172142, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016598861664533615, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010635677725076675, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11587606370449066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11587606370449066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.18.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2618522047996521, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2349979430437088, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2251310795545578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19935421645641327, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12157649546861649, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11107917129993439, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14144262671470642, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1300995796918869, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12534572184085846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10456204414367676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09800326824188232, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07259685546159744, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06280482560396194, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.058939605951309204, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.058000072836875916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.036651670932769775, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03147295489907265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03128467872738838, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02821420319378376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02762088179588318, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020341554656624794, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020956598222255707, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01917290687561035, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.015512891113758087, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10456204414367676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10456204414367676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10780450701713562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10038428753614426, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09696336835622787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08739811927080154, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05040529742836952, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.047224681824445724, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05831645801663399, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.053752582520246506, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05123063549399376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.044931281358003616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0426672026515007, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029696030542254448, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02572178654372692, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.024204198271036148, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02384519763290882, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014851556159555912, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012554241344332695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012393003329634666, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011456655338406563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011225022375583649, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007793233264237642, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007875227369368076, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007280536461621523, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005301926285028458, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10780450701713562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10780450701713562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0927080437541008, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08631247282028198, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08305894583463669, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0748743861913681, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.043284960091114044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.040398694574832916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.050606466829776764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04673446714878082, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04398815706372261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03866134583950043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03673509508371353, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.025687027722597122, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.022339319810271263, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.020792445167899132, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.020413393154740334, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012852419167757034, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010762907564640045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01059026550501585, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.00982822198420763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.00959153100848198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006751024164259434, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006767012178897858, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006264704745262861, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004496827721595764, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0927080437541008, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0927080437541008, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22666792571544647, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2114250212907791, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2056267112493515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.18573635816574097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10647860169410706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10045915842056274, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12181863933801651, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11149778962135315, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10814908891916275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09509080648422241, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0902378112077713, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.061952847987413406, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05333777517080307, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.051026977598667145, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05046583339571953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030964713543653488, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026135796681046486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.025908008217811584, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02380192279815674, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023442678153514862, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016077304258942604, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015764860436320305, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015187890268862247, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010087350383400917, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11149778962135315, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11149778962135315, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2156694382429123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18684121966362, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17242968082427979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.14835497736930847, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09965106099843979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08577993512153625, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12542346119880676, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11468285322189331, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10411466658115387, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08244862407445908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0770435780286789, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06505176424980164, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05585484579205513, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.048986054956912994, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04726562276482582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032762281596660614, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02702840231359005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026347123086452484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02387121133506298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.022812264040112495, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.018179774284362793, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.019532622769474983, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016026927158236504, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.015083109959959984, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11468285322189331, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11468285322189331, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1773328334093094, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1663159281015396, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16262881457805634, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14757463335990906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08379115164279938, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07967358082532883, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09384387731552124, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08659636974334717, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08498689532279968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07535401731729507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0716405138373375, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04807663336396217, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04169248417019844, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04042648524045944, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04012971371412277, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024088125675916672, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021282296627759933, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.0211903378367424, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019630128517746925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.0194498673081398, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.0129938879981637, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013590159825980663, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012586776167154312, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009849653579294682, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09384387731552124, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09384387731552124, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2384173572063446, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2237405925989151, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2189585417509079, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19876956939697266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11228464543819427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10691580176353455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12558725476264954, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11571014672517776, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11389529705047607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1010141372680664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09589853137731552, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06382304430007935, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05531185492873192, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.0537661574780941, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05339536443352699, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.031877871602773666, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027494104579091072, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027378879487514496, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.0251713078469038, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02494014985859394, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016553156077861786, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016353169456124306, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016011560335755348, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01046762429177761, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11571014672517776, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11571014672517776, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.19.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.26379403471946716, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2362118363380432, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22582891583442688, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19942176342010498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12250043451786041, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11162334680557251, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14325681328773499, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13159656524658203, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12648765742778778, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10497109591960907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09818918257951736, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07370711117982864, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.0635518953204155, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.059377335011959076, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05837910994887352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03722762316465378, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03166251629590988, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03145795315504074, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.028300099074840546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.027658775448799133, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020677898079156876, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.021064622327685356, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.019391560927033424, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.015506978146731853, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10497109591960907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10497109591960907, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11156021058559418, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10392248630523682, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10057766735553741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09068357199430466, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05208379775285721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04891499876976013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05998799577355385, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05527758598327637, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05293797329068184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.046478744596242905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04411252215504646, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03050270676612854, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026450185105204582, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.025003798305988312, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024643883109092712, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015263230539858341, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012902351096272469, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012753453105688095, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011770548298954964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01155044510960579, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007978666573762894, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007994215935468674, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007485266774892807, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005291687324643135, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11156021058559418, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11156021058559418, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09442179650068283, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08800485730171204, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08479662239551544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07652339339256287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04403811693191528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04117394611239433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.051252543926239014, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04735565558075905, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.044743768870830536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03936539590358734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03741278499364853, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02600201964378357, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02263842336833477, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.021128268912434578, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02076425775885582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012993726879358292, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010908262804150581, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010740051046013832, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.00996437855064869, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009732485748827457, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0068060955964028835, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0068077039904892445, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006336194463074207, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004475626163184643, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09442179650068283, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09442179650068283, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2265310138463974, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21148443222045898, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.20563970506191254, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.18577170372009277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10635204613208771, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10031300783157349, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12144885212182999, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1114211454987526, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1079879105091095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09502734243869781, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09009107202291489, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06186036020517349, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.053390681743621826, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05096788331866264, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05037963017821312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030931487679481506, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026137731969356537, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.025907568633556366, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.023827267810702324, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023451467975974083, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01609848439693451, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015844514593482018, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015217679552733898, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010198977775871754, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1114211454987526, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1114211454987526, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2244541496038437, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1960371732711792, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.18624509871006012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15996207296848297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10402680188417435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09353359788656235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12262436747550964, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11199900507926941, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1079908087849617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08568021655082703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07946186512708664, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06320691108703613, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05459415912628174, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05096806585788727, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05008003115653992, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031930167227983475, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027988357469439507, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027743622660636902, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024632683023810387, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024092797189950943, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01802157238125801, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.019589196890592575, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016929134726524353, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.015408200211822987, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11199900507926941, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11199900507926941, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17723815143108368, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1663118302822113, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16263550519943237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1476336419582367, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08368711918592453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07965099066495895, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09369011968374252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0864543691277504, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08487677574157715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07532382011413574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07164221256971359, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04794178530573845, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.041624926030635834, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04038777947425842, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.040083713829517365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024029452353715897, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02127993479371071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.021185394376516342, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019640324637293816, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019457954913377762, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012935400009155273, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013603695668280125, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01252732053399086, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00988758634775877, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09369011968374252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09369011968374252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23684409260749817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22226674854755402, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21760310232639313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19758400321006775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1114661768078804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10617879778146744, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12466943264007568, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11484319716691971, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11304797232151031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10031472891569138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09531314671039581, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06344354897737503, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05488153174519539, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05335799604654312, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.052992142736911774, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.031685031950473785, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02723051607608795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027117755264043808, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024928122758865356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024695226922631264, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01648993045091629, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016114186495542526, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015945905819535255, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010203437879681587, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11484319716691971, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11484319716691971, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.20.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2683311402797699, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2396254539489746, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2290578931570053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.2018454521894455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12454269081354141, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11327257752418518, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14570027589797974, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13365322351455688, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12860891222953796, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10631304234266281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09952777624130249, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07482931762933731, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06458328664302826, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.060405902564525604, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.059399306774139404, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.037764210253953934, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03227898105978966, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03209279477596283, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.028796466067433357, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02816499024629593, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020932059735059738, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.021550282835960388, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.019670333713293076, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.015990186482667923, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10631304234266281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10631304234266281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11868234723806381, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11051368713378906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10703671723604202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09663169085979462, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.055403418838977814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.052080996334552765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0636843666434288, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.058698199689388275, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.056339770555496216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04950724542140961, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04698335379362106, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.032432179898023605, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028146106749773026, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.026638967916369438, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.026278803125023842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016229892149567604, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013871625065803528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013724825344979763, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012698711827397346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012473382987082005, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008562841452658176, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008757677860558033, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008063382469117641, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006028642877936363, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11051368713378906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11051368713378906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09671612083911896, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09009795635938644, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08664356172084808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07821591198444366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04502758011221886, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04203248396515846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05266236141324043, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0486992709338665, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.045775678008794785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.040277037769556046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03831671550869942, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02672072872519493, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023268166929483414, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02162126637995243, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021222636103630066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013374949805438519, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011191161349415779, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011005042120814323, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010236073285341263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.00998177845031023, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007024234626442194, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00704082939773798, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006516304798424244, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004669753834605217, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09671612083911896, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09671612083911896, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23015055060386658, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21484237909317017, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.20916877686977386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.18911713361740112, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10797806829214096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10202528536319733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12296824902296066, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11278631538152695, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10967373102903366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09661965072154999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09168106317520142, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06266256421804428, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05402304604649544, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05172201991081238, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.051173582673072815, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031323857605457306, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026512321084737778, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026294633746147156, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024188432842493057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023827461525797844, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01627352647483349, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01601957529783249, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015404188074171543, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010297228582203388, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11278631538152695, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11278631538152695, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22811754047870636, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2015581578016281, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.19220981001853943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15888424217700958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.1064380556344986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09632471203804016, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12464528530836105, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11367840319871902, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10990815609693527, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0871194526553154, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07732872664928436, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0646296888589859, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05543549358844757, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05214860662817955, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.051361799240112305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03269408270716667, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02841280959546566, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02819005958735943, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02479710429906845, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024303460493683815, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.018517596647143364, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.019556112587451935, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017563896253705025, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01521189883351326, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11367840319871902, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11367840319871902, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17282924056053162, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1620769500732422, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15856094658374786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14398352801799774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08159798383712769, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07768097519874573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09118005633354187, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08424476534128189, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08277308195829391, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07342502474784851, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06979256123304367, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.046690165996551514, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04056084156036377, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03935791552066803, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0390767902135849, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02338043786585331, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02071167714893818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02063089981675148, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01911141537129879, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018942881375551224, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012561383657157421, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013211498036980629, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01217492762953043, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009568346664309502, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09118005633354187, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09118005633354187, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23505878448486328, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22062772512435913, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21607926487922668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19632206857204437, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11061463505029678, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10540130734443665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12347541749477386, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11387896537780762, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11222173273563385, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09960214048624039, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09462124854326248, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06284259259700775, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05440208315849304, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05292799323797226, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05257365107536316, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03136308491230011, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026998130604624748, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026888318359851837, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024712443351745605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02448466792702675, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016285490244627, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01594523712992668, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015770133584737778, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010060797445476055, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11387896537780762, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11387896537780762, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.21.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.26043030619621277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23210707306861877, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22114916145801544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1947953850030899, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12054795771837234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.1092592403292656, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14274044334888458, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13037371635437012, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1246030405163765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10285177081823349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09632151573896408, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0729309618473053, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06287012249231339, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05841914564371109, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05734704062342644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03670553117990494, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03113796003162861, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03089963085949421, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02772696129977703, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.027043480426073074, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020104004070162773, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020752381533384323, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01868763566017151, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.015216195024549961, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10285177081823349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10285177081823349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12300881743431091, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11476995050907135, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11135338991880417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10064000636339188, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0573999285697937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05408317968249321, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06577495485544205, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06056174635887146, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.058309756219387054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.051403459161520004, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.048821862787008286, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.033442746847867966, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028972968459129333, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.027533186599612236, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02719217911362648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016722004860639572, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014224459417164326, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014082711189985275, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013015137054026127, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012797628529369831, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008750076405704021, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00879924651235342, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008254416286945343, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005869716871529818, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11476995050907135, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11476995050907135, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09940417855978012, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09275725483894348, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08947579562664032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08086050301790237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04628133773803711, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.043338630348443985, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05376976355910301, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.049697935581207275, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04700768366456032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.041466422379016876, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03943956643342972, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.027239473536610603, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023722004145383835, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022182432934641838, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0218082033097744, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013612331822514534, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01142072957009077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011247220449149609, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010449965484440327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010210447013378143, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00710582360625267, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007084701210260391, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006623860448598862, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004606118891388178, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09940417855978012, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09940417855978012, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22889839112758636, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21404486894607544, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.20826755464076996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1885911226272583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.1074446439743042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10150415450334549, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12274147570133209, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11258548498153687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10905613750219345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09636440873146057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09151116013526917, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06246368587017059, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05396438017487526, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05148269608616829, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05088158696889877, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031245790421962738, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026373283937573433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02612997032701969, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024107173085212708, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023723475635051727, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016292620450258255, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015946760773658752, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015414969995617867, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010210939683020115, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11258548498153687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11258548498153687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22597230970859528, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.20519164204597473, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1984996199607849, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.17461781203746796, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10554524511098862, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09766835719347, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12101481109857559, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11055313050746918, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10798405110836029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09040859341621399, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08437222987413406, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06239812821149826, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.053825441747903824, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0515533983707428, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05102640390396118, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031622570008039474, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02796941250562668, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02779783494770527, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02510964497923851, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024764303117990494, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017891982570290565, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018976256251335144, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017226791009306908, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014733009971678257, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11055313050746918, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11055313050746918, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17045053839683533, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.15995098650455475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15648958086967468, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1422109305858612, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0804063007235527, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07653924822807312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08980954438447952, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08300255239009857, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08156707882881165, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07237327843904495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06884194165468216, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04592186585068703, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.039892300963401794, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.038734085857868195, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.038460392504930496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.022978447377681732, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02028895728290081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02020631730556488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018704233691096306, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018535243347287178, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012273634783923626, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012808157131075859, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011894263327121735, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009133915416896343, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08980954438447952, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08980954438447952, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.231953427195549, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21780917048454285, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21328185498714447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19388702511787415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10914389789104462, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10398691147565842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1217714250087738, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11234498769044876, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11071692407131195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09830360114574432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09343333542346954, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06190695986151695, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05368797108530998, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.052232276648283005, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.051883354783058167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030915671959519386, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026634275913238525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026530727744102478, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02440163679420948, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024181200191378593, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016023969277739525, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015735620632767677, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015516621060669422, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009930850937962532, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11234498769044876, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11234498769044876, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.22.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2607116401195526, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23157218098640442, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22059698402881622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19461973011493683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12036006897687912, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10887303203344345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1419709324836731, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.130003422498703, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12460079789161682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10242614150047302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09607149660587311, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07263211160898209, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06253600120544434, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.058142196387052536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.0570763498544693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.036672573536634445, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.030672626569867134, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030445097014307976, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.027169039472937584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02647731453180313, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.02014177292585373, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020047776401042938, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01876750774681568, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014258398674428463, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10242614150047302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10242614150047302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11872251331806183, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11090385913848877, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1076270341873169, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09735409915447235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.055451538413763046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.052332669496536255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06354468315839767, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05852006748318672, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05633189156651497, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04971582442522049, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04728265106678009, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03228731080889702, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.027974827215075493, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02659793198108673, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.026260821148753166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016157492995262146, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013710134662687778, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013569669798016548, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012543822638690472, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012334452010691166, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008449256420135498, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008432379923760891, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007983734831213951, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00555779505521059, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11090385913848877, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11090385913848877, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09889694303274155, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09233809262514114, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0891631618142128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08057942986488342, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04607426002621651, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.043221425265073776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.053359225392341614, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04935019090771675, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04679859057068825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04129617288708687, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03928333520889282, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.027051303535699844, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023565776646137238, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022093864157795906, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021739957854151726, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013525877147912979, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011398275382816792, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01123356819152832, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010430777445435524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010206041857600212, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0070731318555772305, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007078929804265499, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00661518843844533, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004644500091671944, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09889694303274155, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09889694303274155, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23544611036777496, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22041699290275574, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21475715935230255, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19462326169013977, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11057224869728088, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10474143922328949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1256972849369049, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11530002951622009, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11225425451993942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09921479225158691, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09433066844940186, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06400405615568161, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0552082285284996, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.052960216999053955, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.052415408194065094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03198713809251785, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02709290385246277, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026876982301473618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024769596755504608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024416252970695496, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016626661643385887, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01628250628709793, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015785396099090576, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010382472537457943, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11530002951622009, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11530002951622009, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22341780364513397, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.20141616463661194, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1925274133682251, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1732654571533203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10459499061107635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09534560143947601, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12352412939071655, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11294758319854736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10762487351894379, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09083946794271469, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08586103469133377, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06358154118061066, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05460004508495331, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05077792704105377, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.049844272434711456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03194315731525421, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026964319869875908, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02661665715277195, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024267885833978653, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0236853938549757, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017238648608326912, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017868155613541603, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015978453680872917, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.012995542027056217, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11294758319854736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11294758319854736, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17054004967212677, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16006676852703094, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1565863937139511, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14221905171871185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08048084378242493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07664132863283157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08987338840961456, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0830070897936821, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08165250718593597, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07245561480522156, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06892994791269302, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04596878960728645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.039932429790496826, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03878839313983917, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03852416202425957, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023027023300528526, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02037404477596283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020300693809986115, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018801698461174965, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018635636195540428, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012362550012767315, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012937918305397034, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011995784938335419, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00931184459477663, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08987338840961456, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08987338840961456, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23575140535831451, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22121655941009521, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2166295200586319, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19688688218593597, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11087075620889664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10573694109916687, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12379711866378784, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11409787088632584, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11247392743825912, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09987387806177139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09493474662303925, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0628969818353653, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.054522834718227386, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05306842550635338, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05271606892347336, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.031426236033439636, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02708224020898342, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026975834742188454, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024814317002892494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02458447590470314, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016347432509064674, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01601940207183361, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01584215834736824, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010150393471121788, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11409787088632584, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11409787088632584, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.23.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25668033957481384, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22734442353248596, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21561725437641144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19004051387310028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11841502785682678, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10649994015693665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14109641313552856, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12923285365104675, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12288407981395721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10056136548519135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09446973353624344, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07232987135648727, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.062259092926979065, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05732571333646774, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05611007660627365, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03659103438258171, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.030399007722735405, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03011762537062168, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026940373703837395, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026160873472690582, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.02018115483224392, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020172107964754105, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018633602187037468, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014507354237139225, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10056136548519135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10056136548519135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12556545436382294, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11731037497520447, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11395090818405151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10313427448272705, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05869397521018982, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05544930696487427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06698581576347351, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06167653203010559, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0595993846654892, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05261316895484924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.050060562789440155, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0340828001499176, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029532739892601967, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02818075381219387, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027862627059221268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017067862674593925, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014602167531847954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01447177305817604, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013379896059632301, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013178378343582153, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008973855525255203, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00907217338681221, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008524412289261818, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006145530845969915, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11395090818405151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11395090818405151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10334215313196182, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09650847315788269, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09328846633434296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08435995876789093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.048161063343286514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.045253317803144455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.055555377155542374, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05137210339307785, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0489167720079422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04317391291260719, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.041028376668691635, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.028151802718639374, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0245230570435524, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02309384010732174, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.022745320573449135, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014080682769417763, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011905986815690994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011752655729651451, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010900458320975304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010678227990865707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007370782550424337, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007376382127404213, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006926660891622305, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004849820397794247, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10334215313196182, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10334215313196182, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24178528785705566, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22636908292770386, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22083646059036255, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20038799941539764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11364776641130447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1077449843287468, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12873704731464386, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11818091571331024, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11535853892564774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10203284025192261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09703963994979858, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06551036983728409, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.056549374014139175, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05440289154648781, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.053881753236055374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03270445019006729, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027769586071372032, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027574047446250916, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025386730208992958, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025048337876796722, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016933610662817955, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016569392755627632, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016131674870848656, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010461573489010334, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11535853892564774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11535853892564774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2264527827501297, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.20606672763824463, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.19956731796264648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1737758368253708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10552551597356796, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09782715141773224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12109287828207016, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1099785715341568, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10774335265159607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0897926390171051, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08243019133806229, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06228533014655113, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.053682249039411545, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0516585037112236, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05116450786590576, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031679436564445496, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028203684836626053, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028056656941771507, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025247927755117416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02496999315917492, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017991917207837105, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.019309982657432556, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017410682514309883, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.015206458978354931, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1099785715341568, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1099785715341568, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.16787904500961304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1575206071138382, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1541401892900467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14004088938236237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0792141854763031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07543130964040756, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08844851702451706, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08167382329702377, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08036375045776367, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07131299376487732, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06786946207284927, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04520263522863388, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03926932066679001, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.0381564199924469, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03789698705077171, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02263191156089306, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.019984839484095573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.019902758300304413, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018420999869704247, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01825781911611557, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012080608867108822, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01261070929467678, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011724961921572685, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008995368145406246, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08844851702451706, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08844851702451706, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2322264164686203, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21802273392677307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21356673538684845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1941481977701187, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10929165035486221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10416524112224579, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12187714129686356, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11242112517356873, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11083544045686722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09843605756759644, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09365028142929077, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.061973754316568375, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05371343344449997, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.052287451922893524, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05194457620382309, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030986681580543518, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026682822033762932, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02657768875360489, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02444327436387539, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02422761172056198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01613575406372547, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015774166211485863, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01565772481262684, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00998256541788578, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11242112517356873, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11242112517356873, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.24.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2479783594608307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22116149961948395, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2110348790884018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18661615252494812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11436565965414047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10381855070590973, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13422493636608124, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1232362687587738, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11826686561107635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.0978478193283081, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09181172400712967, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06875257194042206, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05924905091524124, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05518481135368347, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05418333411216736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.034616872668266296, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029027391225099564, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02881753072142601, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025787629187107086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02514820173382759, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018823087215423584, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01884925365447998, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017503630369901657, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013302446343004704, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11436565965414047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11436565965414047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12103240191936493, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11316385120153427, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10990452021360397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09949423372745514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05655151605606079, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0533750094473362, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06467144936323166, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.059613484889268875, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05742467939853668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05078384652733803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04838784039020538, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03290813788771629, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028494300320744514, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02711758017539978, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0267773550003767, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016457268968224525, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013971099629998207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013833802193403244, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01280570961534977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012597015127539635, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00859684869647026, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00858602300286293, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008138835430145264, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0056524258106946945, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11316385120153427, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11316385120153427, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0999259352684021, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09335813671350479, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09012730419635773, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08154776692390442, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04656486213207245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04372388869524002, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.053923048079013824, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.049852196127176285, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04729717597365379, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04178382456302643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03979852423071861, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02733965590596199, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02381150610744953, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022335592657327652, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02197919227182865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0136800492182374, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011542128399014473, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01138084102421999, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010577056556940079, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01034966204315424, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007178445812314749, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007197731640189886, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0067227729596197605, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004764024168252945, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0999259352684021, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.0999259352684021, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23739372193813324, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2225217968225479, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2171328067779541, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19705407321453094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11152113974094391, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10587088018655777, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12615175545215607, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1158253401517868, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11318271607160568, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10021738708019257, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09535212069749832, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06419792771339417, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05536177009344101, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.053383029997348785, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.052915364503860474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032051101326942444, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027244046330451965, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02706623636186123, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02491494081914425, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024604501202702522, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016575060784816742, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01622677966952324, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01581338420510292, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010238789021968842, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1158253401517868, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1158253401517868, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22494076192378998, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2021712064743042, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.19485150277614594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1612163782119751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10549706220626831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0971449762582779, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12089153379201889, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11069348454475403, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10806912928819656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08721015602350235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07703575491905212, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0623769648373127, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05376093462109566, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.051407426595687866, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.050833214074373245, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03139173984527588, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027671607211232185, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02751106768846512, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024210529401898384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023853272199630737, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017403865233063698, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018546754494309425, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016683027148246765, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014149542897939682, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11069348454475403, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11069348454475403, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1738593876361847, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1631411463022232, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15966854989528656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14522004127502441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08201076835393906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07811470329761505, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09147282689809799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08449044078588486, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08319731056690216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07384109497070312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07026255130767822, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.046719927340745926, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04057510197162628, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.039450984448194504, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0391833521425724, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02341090701520443, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020580559968948364, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020502835512161255, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018950071185827255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018790436908602715, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012508252635598183, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01284845732152462, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01214856281876564, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009021344594657421, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09147282689809799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09147282689809799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2362748384475708, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22183836996555328, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2173798531293869, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1976855993270874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11114451289176941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10600319504737854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12410498410463333, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11425364017486572, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11272794753313065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1001562848687172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09529398381710052, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06300429254770279, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05461917072534561, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05319061875343323, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05285259336233139, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.0315568782389164, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027170995250344276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027070682495832443, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024906761944293976, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024690113961696625, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01656222902238369, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01609472930431366, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016090741381049156, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010254443623125553, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11425364017486572, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11425364017486572, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.25.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25122949481010437, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22344037890434265, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2128700613975525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18822865188121796, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11573683470487595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.1047302857041359, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13596084713935852, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12515856325626373, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11981472373008728, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09888788312673569, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09281378239393234, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06955668330192566, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06028951704502106, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05593162029981613, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.054849207401275635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03495750576257706, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029551735147833824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02932433784008026, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026277517899870872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02559695951640606, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01871742680668831, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01939620077610016, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017284708097577095, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013857437297701836, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11573683470487595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11573683470487595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12116823345422745, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11346641927957535, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11024803668260574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10001564770936966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05672156810760498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.053574226796627045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06474802643060684, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05967371538281441, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05755425989627838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05099291726946831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04863467067480087, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0329497829079628, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028560705482959747, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02719571441411972, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02686362713575363, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01648661307990551, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014027685858309269, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013891199603676796, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012876546010375023, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012667940929532051, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008640366606414318, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008636741898953915, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008199218660593033, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005731787532567978, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11346641927957535, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11346641927957535, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10244227200746536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09585254639387131, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09276619553565979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08404242992401123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.047820594161748886, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04500090330839157, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05506549030542374, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.050903499126434326, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.048538416624069214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.042995840311050415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.040939170867204666, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.027945933863520622, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0243079774081707, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022910576313734055, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.022579176351428032, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013972777873277664, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011785656213760376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01163693517446518, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010804661549627781, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010592645965516567, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007292258553206921, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007254897151142359, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00685902452096343, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004718636628240347, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10244227200746536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10244227200746536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24002684652805328, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22513824701309204, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22001053392887115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1998203843832016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11282302439212799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10720965266227722, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12720994651317596, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11683592945337296, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11445635557174683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10147792100906372, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09656305611133575, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06471587717533112, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05581916496157646, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05396047234535217, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0535111278295517, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032288894057273865, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027446143329143524, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02728305757045746, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025097152218222618, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024811210110783577, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016622668132185936, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01616792194545269, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015895355492830276, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00999983586370945, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11683592945337296, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11683592945337296, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23648107051849365, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21235540509223938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.20474423468112946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.17226605117321014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11129264533519745, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10246729105710983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1266615092754364, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11614180356264114, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11383861303329468, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09224512428045273, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.08258554339408875, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06553246080875397, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05647428333759308, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05431750416755676, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.053800493478775024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03314574062824249, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.029342401772737503, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02921079285442829, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025731580331921577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02540682628750801, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01867791824042797, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.019749222323298454, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017995210364460945, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.015248113311827183, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11614180356264114, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11614180356264114, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17744016647338867, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16659387946128845, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16306345164775848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1483653336763382, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0836399644613266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07971785217523575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09312495589256287, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08612591028213501, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.0848616361618042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07539096474647522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07172656059265137, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04755352437496185, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.0413176566362381, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.040201082825660706, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.039931729435920715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023766793310642242, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02089890092611313, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020821578800678253, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019245179370045662, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01908233016729355, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.0125685790553689, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012956356629729271, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012202249839901924, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008988585323095322, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09312495589256287, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09312495589256287, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23945772647857666, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22496327757835388, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2204284369945526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20061782002449036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11268299072980881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10749199986457825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12538239359855652, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1158500537276268, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11427497863769531, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10161037743091583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09664712101221085, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06376571953296661, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05533936992287636, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05391021817922592, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05356960743665695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03182901442050934, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02748114801943302, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027384890243411064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025190256536006927, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02497933804988861, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01647350564599037, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016211669892072678, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01598760485649109, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.0102155152708292, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1158500537276268, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1158500537276268, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.26.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2512302100658417, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22337734699249268, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21246322989463806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18805348873138428, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11564233899116516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10446734726428986, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1369142085313797, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12574805319309235, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11980867385864258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09892179071903229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09298636019229889, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06983257085084915, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06036417558789253, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05577181652188301, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05464938282966614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03497879207134247, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029263490810990334, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.029002979397773743, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025972675532102585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025240087881684303, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018691129982471466, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018983928486704826, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017196662724018097, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013207820244133472, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11564233899116516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11564233899116516, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11805503815412521, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11042030900716782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10717649757862091, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09716199338436127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05517236143350601, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05207935720682144, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06324823945760727, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05825068801641464, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05601401627063751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.049574509263038635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04729177802801132, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03219690918922424, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02784951776266098, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.026482049375772476, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02614687569439411, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016125265508890152, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013704526238143444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013564439490437508, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012582135386765003, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01237449236214161, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008491339161992073, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008512350730597973, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008039851672947407, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005715020000934601, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11042030900716782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11042030900716782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10066473484039307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.0941227599978447, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09090685099363327, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08229522407054901, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04695393517613411, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0440777987241745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.054404277354478836, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05027896165847778, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04768013209104538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04216714948415756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.040182553231716156, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02758190408349037, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02401793748140335, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0225117988884449, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02215380221605301, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013794178143143654, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011609600856900215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011444121599197388, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010644348338246346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010417112149298191, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007225466426461935, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0072097391821444035, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006764542777091265, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0047323014587163925, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10066473484039307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10066473484039307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23834244906902313, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22340688109397888, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.218158558011055, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19812431931495667, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11202780157327652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10636764019727707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12648671865463257, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11624626815319061, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11365882307291031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10073123127222061, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09583346545696259, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0643705427646637, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.055510517209768295, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05358247086405754, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.053119756281375885, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032109469175338745, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02729794755578041, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027131488546729088, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02497553825378418, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02467866986989975, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016547100618481636, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0161734689027071, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015795396640896797, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010107574053108692, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11624626815319061, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11624626815319061, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2218860685825348, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1936790645122528, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.18455685675144196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15737023949623108, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10331954061985016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0925121009349823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12016672641038895, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10951955616474152, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10654902458190918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08437500894069672, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07689737528562546, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06215536221861839, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05321202054619789, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.050500836223363876, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04985394701361656, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031569212675094604, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02740245684981346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027217255905270576, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.023864207789301872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02345208451151848, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017985301092267036, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018696056678891182, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01716252975165844, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014479556120932102, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10951955616474152, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10951955616474152, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1830662190914154, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.17194591462612152, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16836939752101898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15321876108646393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08627407997846603, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08224792033433914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09624552726745605, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08876803517341614, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08749812096357346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07776396721601486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0740995928645134, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0490838997066021, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.042625900357961655, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04149843007326126, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04123438149690628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024610938504338264, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021648133173584938, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.021569736301898956, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019952163100242615, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01978987082839012, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013182826340198517, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013512012548744678, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01282153557986021, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009492352604866028, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09624552726745605, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09624552726745605, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23989541828632355, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22547024488449097, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22095896303653717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20108748972415924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11291296035051346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10771587491035461, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12570036947727203, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11602174490690231, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11446461826562881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1018315926194191, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.096902035176754, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06395646184682846, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.055463384836912155, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.054046422243118286, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.053709082305431366, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03199874609708786, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027604490518569946, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.0275108702480793, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025326818227767944, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02511376515030861, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016777023673057556, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016354339197278023, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016306478530168533, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010434162802994251, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11602174490690231, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11602174490690231, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.27.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25043901801109314, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22154287993907928, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2098875641822815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1857651323080063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11511103063821793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10338422656059265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13669073581695557, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12590448558330536, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1194232627749443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09802189469337463, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.092130646109581, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06986350566148758, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.0603666752576828, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05549349635839462, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.0542876198887825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.034978967159986496, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029038704931735992, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028755320236086845, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025666631758213043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024886123836040497, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018663618713617325, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018800048157572746, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017096713185310364, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012933408841490746, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11511103063821793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11511103063821793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11923912167549133, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11155320703983307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10826338827610016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09816436469554901, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0557444803416729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.052585896104574203, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06403296440839767, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.058912646025419235, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.056599780917167664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05012314021587372, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04785197228193283, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03258633241057396, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028164852410554886, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02672124095261097, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.026373885571956635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016297804191708565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01374880876392126, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013602820225059986, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012606657110154629, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012389782816171646, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008540146052837372, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008433325216174126, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008072509430348873, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0055131022818386555, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11155320703983307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11155320703983307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10200892388820648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09536479413509369, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09212368726730347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08346337080001831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04762521758675575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04469849541783333, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05528697371482849, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.051058750599622726, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04834233596920967, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.042784906923770905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04080357402563095, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.028043441474437714, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.024378012865781784, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022845197468996048, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02247404307126999, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01403298880904913, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011778336018323898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011608178727328777, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010798521339893341, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010563937947154045, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007356864865869284, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007316357456147671, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006885482929646969, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004798092879354954, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10200892388820648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10200892388820648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2367485761642456, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22195929288864136, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21660222113132477, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19668516516685486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11124417185783386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10554149001836777, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1260613650083542, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11566512286663055, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11283214390277863, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10000424832105637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09534558653831482, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06414421647787094, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.055257219821214676, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05323241651058197, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.052732035517692566, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032008346170186996, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027136731892824173, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026959557086229324, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024819398298859596, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024510527029633522, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016517814248800278, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016125312075018883, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015746289864182472, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010108535178005695, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11566512286663055, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11566512286663055, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20647253096103668, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1848103106021881, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17621655762195587, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15136410295963287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09731560945510864, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08830176293849945, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11397568881511688, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10423325002193451, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09954497963190079, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08108734339475632, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07415123283863068, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05907045304775238, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.050649337470531464, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04755360260605812, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04680170491337776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03000723384320736, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.025817746296525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.025537079200148582, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02278953790664673, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.022315872833132744, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017064282670617104, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017719682306051254, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016116289421916008, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.013660048134624958, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11397568881511688, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11397568881511688, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.16910292208194733, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1587499976158142, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15545953810214996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14154110848903656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07973061501979828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07599887251853943, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08888636529445648, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08204102516174316, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08084510266780853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07187643647193909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06852681189775467, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04542427882552147, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.039366066455841064, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03831048309803009, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03806304186582565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02272513136267662, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.019909152761101723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.019836777821183205, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018339022994041443, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01818132773041725, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012031810358166695, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012326076626777649, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01169200986623764, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008536921814084053, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08888636529445648, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08888636529445648, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2364160716533661, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2221444994211197, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21771493554115295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19836854934692383, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11125433444976807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1061476469039917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1241706907749176, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1143384650349617, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11280851066112518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1003737822175026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0957212895154953, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06312885880470276, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05467986688017845, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.053281232714653015, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.052942972630262375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.031561896204948425, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02726055309176445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027162423357367516, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025022942572832108, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024813447147607803, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016435813158750534, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01621284894645214, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015967851504683495, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01042520347982645, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1143384650349617, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1143384650349617, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.28.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2511174976825714, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22480057179927826, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21454742550849915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1911991536617279, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1159810796380043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10545129328966141, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13695824146270752, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12547650933265686, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11994059383869171, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10014399886131287, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09456460922956467, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0699637308716774, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06034679710865021, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.056029658764600754, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05497399717569351, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.035147860646247864, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029565945267677307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02932669222354889, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026475155726075172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025800062343478203, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01895446889102459, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019340755417943, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017547301948070526, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013749262318015099, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1159810796380043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1159810796380043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11222465336322784, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10499447584152222, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10193818807601929, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09242656826972961, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0524970106780529, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.049526751041412354, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06030169501900673, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05545005947351456, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05328578129410744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04717768356204033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04509086161851883, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030703261494636536, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02650994434952736, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02517094276845455, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024851705878973007, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015376807190477848, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012982707470655441, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012843070551753044, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011912220157682896, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011708654463291168, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008093642070889473, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008000526577234268, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007662574294954538, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005285355262458324, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11222465336322784, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11222465336322784, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09589777886867523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08972489088773727, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08645636588335037, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07832015305757523, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04470326378941536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04188985750079155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05217388644814491, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04819052293896675, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04538752883672714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04020943492650986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03837921470403671, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.026457924395799637, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02300933003425598, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.021458497270941734, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021082358434796333, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013257564976811409, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011103648692369461, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010929292999207973, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010197436437010765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009962145239114761, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00698605552315712, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006961877457797527, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006516630295664072, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00461408169940114, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09589777886867523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09589777886867523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24212783575057983, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22705703973770142, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22186769545078278, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20148591697216034, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11377376317977905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10814162343740463, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12862296402454376, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11788322031497955, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11544184386730194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.1023954302072525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09759354591369629, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0654846727848053, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05633116886019707, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05442699044942856, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05397733300924301, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032661519944667816, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027687452733516693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027530226856470108, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025326278060674667, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025040024891495705, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016818039119243622, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01633412577211857, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016071049496531487, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010134111158549786, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11544184386730194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11544184386730194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1971164494752884, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1634238362312317, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15238454937934875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.13382793962955475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09019492566585541, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07753822952508926, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10663396865129471, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09772562235593796, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09436018019914627, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07131524384021759, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06679989397525787, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05512676388025284, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04776579141616821, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04440684616565704, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04358971118927002, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.027950696647167206, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024631375446915627, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.024436186999082565, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.021243449300527573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.020743919536471367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015964454039931297, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01755054108798504, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014985566027462482, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014044946059584618, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10663396865129471, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10663396865129471, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.16506044566631317, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1551167070865631, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15193170309066772, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1383408159017563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07774364203214645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07416358590126038, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08664403110742569, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07997532933950424, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.07884696125984192, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0701449066400528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06685908138751984, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.044188495725393295, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.038317643105983734, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.037307024002075195, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03707470744848251, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.022089185193181038, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.019298233091831207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.0192293468862772, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.017768103629350662, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01761677861213684, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.011615865863859653, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.011813591234385967, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011285804212093353, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00802589301019907, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08664403110742569, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08664403110742569, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23005014657974243, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21636544167995453, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21210849285125732, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19316570460796356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10820899903774261, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10330922901630402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12052549421787262, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11116191744804382, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10970570892095566, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09768522530794144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09306755661964417, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06130676344037056, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05311049893498421, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05176163092255592, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0514400489628315, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030585329979658127, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02636784128844738, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026273857802152634, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02419080212712288, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.023983042687177658, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01581714302301407, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01550319790840149, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01536724902689457, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009727228432893753, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11116191744804382, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11116191744804382, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.29.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24138911068439484, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2156471610069275, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20532695949077606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1835731565952301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11119730770587921, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10080860555171967, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13200756907463074, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12098318338394165, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1151244044303894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09609243273735046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09107275307178497, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06744826585054398, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.058096304535865784, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05366437882184982, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.052570492029190063, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.033885207027196884, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.028205178678035736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.027946598827838898, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025239121168851852, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024554509669542313, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018301932141184807, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018365807831287384, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016875453293323517, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012884252704679966, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1151244044303894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1151244044303894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11304477602243423, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10599398612976074, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10311642289161682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0937037393450737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.052953220903873444, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05010533332824707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.060407157987356186, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05571809783577919, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.053729794919490814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04773402214050293, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04554654657840729, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030711296945810318, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02664678357541561, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.025380190461874008, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.025080397725105286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015362212434411049, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013024328276515007, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012895754538476467, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011956487782299519, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011765155009925365, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008025218732655048, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007923013530671597, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007616565562784672, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00512648094445467, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11304477602243423, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11304477602243423, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10097580403089523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09468921273946762, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09176409989595413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08340223878622055, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04727394878864288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04461655765771866, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05446675419807434, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05015261843800545, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04797150567173958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.042598262429237366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04072568938136101, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.027660353109240532, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023960217833518982, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022662052884697914, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.022354772314429283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013828783296048641, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011643457226455212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011508985422551632, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010695839300751686, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010497826151549816, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007228217553347349, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007130790967494249, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006819292902946472, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004618130624294281, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10097580403089523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10097580403089523, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2429220825433731, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2282998114824295, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2232915759086609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.2031753808259964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11440213024616241, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10887056589126587, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12886638939380646, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11821343004703522, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11601750552654266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10314781963825226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09841170161962509, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06556767225265503, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.056493256241083145, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05470915511250496, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05429821461439133, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03271850571036339, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0278323981910944, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02768896520137787, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025499189272522926, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025226788595318794, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01684618927538395, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01639152690768242, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016167273744940758, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010179594159126282, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11601750552654266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11601750552654266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2056204080581665, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16929277777671814, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15494051575660706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12240090221166611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0937672033905983, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07870671898126602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11559122055768967, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1058744341135025, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09903381764888763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07111285626888275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06441648304462433, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05970658361911774, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05135160684585571, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04581816866993904, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04443918541073799, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030042793601751328, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02493673749268055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.024507753551006317, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020772762596607208, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019870681688189507, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01659136824309826, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01754613220691681, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014868895523250103, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01330060139298439, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11559122055768967, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11559122055768967, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17588947713375092, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16532233357429504, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16197866201400757, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.147552028298378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0828835517168045, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07909130305051804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09235482662916183, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08522825688123703, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08403462171554565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07481315732002258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07128174602985382, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04711325466632843, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.040822334587574005, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.039761897176504135, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03951364383101463, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023544305935502052, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.020553847774863243, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.020479632541537285, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018918629735708237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018760886043310165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012386304326355457, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012552299536764622, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012033374048769474, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008504902943968773, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09235482662916183, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09235482662916183, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2368476539850235, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2227340042591095, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21834011375904083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19904322922229767, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11141163855791092, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10636971890926361, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12431733310222626, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.114413782954216, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11294227093458176, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10061478614807129, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0959962010383606, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0632336437702179, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05468428507447243, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05331026017665863, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05298631638288498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03163507580757141, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027216920629143715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027128148823976517, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024991612881422043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024780111387372017, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01653640903532505, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016103483736515045, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016087761148810387, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010246108286082745, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.114413782954216, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.114413782954216, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.30.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24741379916667938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22097301483154297, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2099994719028473, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18759874999523163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1140529215335846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10320553928613663, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1360790878534317, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12469013035297394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11818505823612213, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09851934760808945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09324590116739273, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06929341703653336, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.059924159198999405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05510135740041733, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.053911756724119186, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03478478640317917, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029012558981776237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028707383200526237, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02595680020749569, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025193259119987488, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01858411356806755, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018987948074936867, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017002293840050697, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013325931504368782, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1140529215335846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1140529215335846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11309946328401566, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10587181895971298, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10274425148963928, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0932527408003807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.052883777767419815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04990976303815842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.060618478804826736, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05593269690871239, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.053685057908296585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04757624492049217, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.045395124703645706, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030827689915895462, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026740437373518944, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.025353869423270226, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02502266690135002, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015421218238770962, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013035203330218792, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012888820841908455, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011948960833251476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011736959218978882, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008062806911766529, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007975843735039234, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007611492183059454, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005180459003895521, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11309946328401566, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11309946328401566, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09755467623472214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09130949527025223, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0882347971200943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08008123934268951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.045636165887117386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04285566136240959, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.052991997450590134, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04885450750589371, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.046333685517311096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04104113206267357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03924426808953285, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02689507231116295, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023339087143540382, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02188127115368843, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021532392129302025, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013453621417284012, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011259377002716064, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011097981594502926, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01032964326441288, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010106153786182404, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0070350137539207935, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006954214535653591, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006581368390470743, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004514489788562059, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09755467623472214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09755467623472214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24152371287345886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22660282254219055, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22142519056797028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.2012854516506195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.1136564239859581, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10804302990436554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12834885716438293, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1176876574754715, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11534898728132248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10229939967393875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09759044647216797, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06534220278263092, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05623546615242958, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05436902493238449, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05392569303512573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03260715305805206, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027669530361890793, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027520857751369476, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025313837453722954, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025033608078956604, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016774678602814674, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016312897205352783, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016052667051553726, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01014493964612484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11534898728132248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11534898728132248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.19587041437625885, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1685875654220581, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1601957082748413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1323215365409851, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09146685898303986, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08130885660648346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10485109686851501, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09622485935688019, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09406489878892899, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07243731617927551, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06305847316980362, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.054001081734895706, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.046484023332595825, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04431619495153427, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04379882663488388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02710442803800106, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.023499347269535065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023374788463115692, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019847242161631584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019498828798532486, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.014742037281394005, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015358570031821728, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014057177118957043, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.011281616054475307, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10485109686851501, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10485109686851501, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1845911741256714, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.17349544167518616, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.16997529566287994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15483398735523224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08701448887586594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08300664275884628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09687549620866776, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08943070471286774, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08822862803936005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07851412892341614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07483638823032379, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.049418456852436066, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.042835645377635956, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04173571988940239, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04148270934820175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024700898677110672, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021557413041591644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.021481353789567947, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.019833911210298538, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019663479179143906, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012981116771697998, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013129853643476963, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01261885929852724, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008857243694365025, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09687549620866776, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09687549620866776, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2395511269569397, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22531472146511078, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22083772718906403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20120443403720856, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11274860054254532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10765665769577026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12552958726882935, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11578798294067383, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1143137663602829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10178668051958084, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09698458760976791, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06385787576436996, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05531378835439682, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.053929418325424194, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05360235273838043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03188817575573921, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02745847962796688, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02736693061888218, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025188598781824112, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02498011663556099, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016523556783795357, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01613123156130314, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016062311828136444, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010109125636518002, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11578798294067383, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11578798294067383, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.31.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2511494755744934, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2240007370710373, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21285301446914673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19017373025417328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1157238706946373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.1046728640794754, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13789185881614685, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12656822800636292, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12003383785486221, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10000136494636536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09453223645687103, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0703190267086029, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06079665943980217, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05585016682744026, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05463123694062233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03536444902420044, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02931860275566578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.029012460261583328, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026221556589007378, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02543654851615429, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019078420475125313, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019090890884399414, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017514776438474655, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013259420171380043, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1157238706946373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1157238706946373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.32.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12754672765731812, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1194472461938858, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11605651676654816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10547137260437012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0599055215716362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05655444785952568, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06885843724012375, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0632091611623764, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06081271916627884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05394390970468521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05165504664182663, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03514416143298149, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03026827797293663, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.028757227584719658, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02839634194970131, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017585523426532745, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014814869500696659, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014666832983493805, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013615889474749565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013380838558077812, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009192211553454399, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009118541143834591, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008696804754436016, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006003107875585556, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11605651676654816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11605651676654816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.32.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10512179881334305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09844053536653519, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09510409086942673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0863475352525711, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04927898198366165, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04625725373625755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.057378578931093216, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05283287912607193, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.050037071108818054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.044376716017723083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04242981970310211, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029159659519791603, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02529311180114746, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.023663286119699478, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02327149733901024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01459246315062046, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012240203097462654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012057337909936905, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01124996691942215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010999535210430622, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007665685378015041, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007654855027794838, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007171806879341602, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005084378179162741, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10512179881334305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10512179881334305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.32.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23453499376773834, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22007110714912415, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2149234265089035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19551090896129608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11040135473012924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10486925393342972, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12544110417366028, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11468939483165741, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1120392307639122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09955227375030518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09501786530017853, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06392528861761093, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05488743633031845, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05287731811404228, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05239453166723251, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03192828223109245, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02700779028236866, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026825807988643646, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024763891473412514, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02445829100906849, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016504891216754913, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016107024624943733, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01571568287909031, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01018636953085661, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11468939483165741, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11468939483165741, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.32.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.14330831170082092, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.13155972957611084, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.12771430611610413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1105075255036354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06813634186983109, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.06377425789833069, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07727524638175964, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.07049597054719925, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06936270743608475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05846383050084114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05337173864245415, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.039969347417354584, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0342409685254097, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03316463157534599, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.03291860967874527, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02007676474750042, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.017735879868268967, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01766861416399479, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.015876824036240578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.015721961855888367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.011044037528336048, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.011699722148478031, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.010709249414503574, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.008866537362337112, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1105075255036354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1105075255036354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.32.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1535862237215042, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.14405596256256104, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.140936017036438, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.12820987403392792, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0723976120352745, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.06893958896398544, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0810965970158577, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07464205473661423, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.07345671206712723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06518713384866714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06214344501495361, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.041464194655418396, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03586432710289955, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03484458103775978, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03460593894124031, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.020792104303836823, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.01817493699491024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.018102062866091728, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.016738776117563248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.016587167978286743, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.011141262017190456, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.011363244615495205, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.010813570581376553, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.007982500828802586, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0810965970158577, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0810965970158577, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.32.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22566241025924683, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2118576318025589, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2074892371892929, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.18877896666526794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10620411485433578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10121160745620728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1189381554722786, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10930368304252625, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10770638287067413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09563986957073212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09118060022592545, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06050074100494385, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05226225033402443, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05087012052536011, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.050538320094347, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030316052958369255, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026017533615231514, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.025919975712895393, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.023841852322220802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.023632854223251343, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015947779640555382, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01547697652131319, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015487608499825, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009936808608472347, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10930368304252625, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10930368304252625, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.32.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.231423482298851, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.20584997534751892, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.1947924792766571, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1740194857120514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10665608197450638, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09577885270118713, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.12786781787872314, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11747536063194275, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.110639289021492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09189338237047195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08716725558042526, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06565061956644058, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05679455026984215, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.051809538155794144, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05055100843310356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03308837115764618, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02774796262383461, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.027407841756939888, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02492666430771351, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024157464504241943, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.017864739522337914, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018831096589565277, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016228266060352325, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013867721892893314, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.110639289021492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.110639289021492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.33.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11773554235696793, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11046602576971054, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10740473121404648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09763038903474808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.055319663137197495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05232299119234085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06339474767446518, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.058212652802467346, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05614608898758888, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04988560080528259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04775698482990265, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0323009230196476, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02788594737648964, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.026554696261882782, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02623184770345688, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016176767647266388, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013702921569347382, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013571169227361679, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012604189105331898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01240473985671997, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008493868634104729, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008454895578324795, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008065447211265564, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005617186892777681, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11046602576971054, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11046602576971054, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.33.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1000090092420578, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09373835474252701, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09078111499547958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08252784609794617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04688052088022232, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04417317360639572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.054208144545555115, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04990924149751663, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.047565728425979614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.042262569069862366, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04043734073638916, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02754782885313034, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02386784367263317, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02249225042760372, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02216688171029091, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013782374560832977, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011589206755161285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011439396068453789, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010654166340827942, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010445300489664078, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007215764373540878, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007158326450735331, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0067917839623987675, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004692370537668467, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1000090092420578, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1000090092420578, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.33.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23272791504859924, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2185615599155426, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2136019915342331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19433854520320892, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10953696072101593, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10416021943092346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12387663871049881, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11339417845010757, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11110007762908936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09871624410152435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.094305120408535, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0630231499671936, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05416935309767723, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05238548293709755, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.051953695714473724, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031467363238334656, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02663537859916687, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0264878049492836, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02439943142235279, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0241253525018692, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016181614249944687, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015696287155151367, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015473236329853535, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009721346199512482, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11339417845010757, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11339417845010757, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.33.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.19251593947410583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.17394448816776276, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16829928755760193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.14318571984767914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09045076370239258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08372066169977188, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10305994749069214, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09427538514137268, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09255796670913696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07593651860952377, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06915799528360367, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0532761812210083, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04577658325433731, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04401069134473801, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04360037297010422, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.026940152049064636, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02360508218407631, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02350020781159401, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020842233672738075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.020573601126670837, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015112902037799358, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015714731067419052, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01457824744284153, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.011924229562282562, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10305994749069214, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10305994749069214, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.33.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.15201729536056519, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.14273418486118317, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.13978202641010284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.12721040844917297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07166841626167297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0682988315820694, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08003133535385132, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07378940284252167, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.07269247621297836, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06458479166030884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.061531975865364075, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0409056656062603, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.035410571843385696, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.034441277384757996, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03422020375728607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.020452044904232025, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.017898239195346832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.01782882958650589, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.016473183408379555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.016332346946001053, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.010825674049556255, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01108593586832285, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.010508660227060318, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00767656322568655, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08003133535385132, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08003133535385132, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.33.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22624942660331726, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21261882781982422, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.20829997956752777, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.18960249423980713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10651905834674835, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10160032659769058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11889275908470154, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.109520323574543, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1080160140991211, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09604798257350922, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09152290225028992, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.060465600341558456, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05235527083277702, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.050991978496313095, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.050664350390434265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030209163203835487, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026019278913736343, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.025926511734724045, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02385892905294895, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.023647604510188103, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015668556094169617, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015391333028674126, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015208040364086628, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00976736843585968, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.109520323574543, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.109520323574543, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.33.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2318650335073471, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.20660780370235443, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.19547609984874725, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17462970316410065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10660325735807419, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09596294164657593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1287674903869629, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1178298369050026, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1106962263584137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09210461378097534, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08738178014755249, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06577048450708389, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05663928762078285, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.051547713577747345, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05028393864631653, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03315843269228935, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02725362405180931, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.026915093883872032, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02443198673427105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.023633720353245735, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018050577491521835, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018084142357110977, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016443977132439613, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012805962935090065, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1106962263584137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1106962263584137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.34.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11229798942804337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10532274842262268, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10230038315057755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0929977223277092, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05268384888768196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.049768850207328796, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06053387373685837, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05556124076247215, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05344622582197189, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04752308130264282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04552134498953819, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030871683731675148, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026631919667124748, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02530525252223015, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024980032816529274, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015463591553270817, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01309509202837944, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012963092885911465, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012059901840984821, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011862363666296005, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008156397379934788, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008145025931298733, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007736228406429291, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0054862950928509235, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11229798942804337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11229798942804337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.34.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09661154448986053, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09061157703399658, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08762983232736588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07956503331661224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.045238249003887177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.042561378329992294, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.052456460893154144, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04833787679672241, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04590144380927086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04077955335378647, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03901733085513115, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.026657862588763237, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023114431649446487, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.021708732470870018, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02136535942554474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013338898308575153, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011198105290532112, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0110422782599926, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010299012996256351, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010081022046506405, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006985126994550228, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006954646669328213, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006554551888257265, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0045718420296907425, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09661154448986053, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09661154448986053, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.34.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2318457067012787, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2178063988685608, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21291500329971313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1936500072479248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10907568037509918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10377718508243561, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12346480041742325, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11306653171777725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11062917113304138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09835392236709595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09399395436048508, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06288019567728043, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.054005950689315796, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05218888446688652, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05176572874188423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031379688531160355, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026604844257235527, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026445899158716202, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024387596175074577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024103760719299316, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016181722283363342, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0157609973102808, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015469009056687355, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009880537167191505, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11306653171777725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11306653171777725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.34.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1889699399471283, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16466990113258362, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15439586341381073, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12851972877979279, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.08845969289541245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07799414545297623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10664258152246475, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09679808467626572, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09094788879156113, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07099142670631409, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06456509977579117, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.055057696998119354, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.047057878226041794, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04329929128289223, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.042379409074783325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.027898667380213737, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.023588722571730614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023231875151395798, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02034292183816433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019754605367779732, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015848007053136826, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01641182042658329, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014718669466674328, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.012631762772798538, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10664258152246475, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10664258152246475, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.34.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1556427776813507, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1462869495153427, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.14329397678375244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.13054093718528748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07336186617612839, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.06998996436595917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08202920854091644, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07548120617866516, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.07440748065710068, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06618543714284897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06319922208786011, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04190831258893013, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03622591495513916, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03526705130934715, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03504073992371559, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.020987726747989655, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.018380293622612953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.018313761800527573, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.016951408237218857, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01681089587509632, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.011193851940333843, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01144965086132288, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01088394783437252, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008019447326660156, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08202920854091644, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08202920854091644, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.34.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22581890225410461, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21237638592720032, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.20812328159809113, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.18952354788780212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10623163729906082, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1014247015118599, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11867798119783401, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1091674342751503, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10771635174751282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09588439017534256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09154161065816879, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.060475632548332214, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.052197132259607315, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05087476596236229, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.050561629235744476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030240539461374283, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02598876692354679, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02590186893939972, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.023860802873969078, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02366030402481556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01580483838915825, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01541211549192667, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015362692065536976, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009848540648818016, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1091674342751503, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1091674342751503, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.34.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.23005002737045288, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.20373691618442535, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.19179482758045197, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1713561713695526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10572695732116699, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09434092044830322, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1283455193042755, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1177356094121933, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.10987148433923721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09075171500444412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08624646812677383, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06561428308486938, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05653536319732666, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05110738426446915, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.049762751907110214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.0328955203294754, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02695171907544136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02656341902911663, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.024020539596676826, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.023159360513091087, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.017619863152503967, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.017868489027023315, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.015873095020651817, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012519140727818012, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.10987148433923721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.10987148433923721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.35.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10650178045034409, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09977240860462189, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09674955904483795, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08789495378732681, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04983773082494736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.047008149325847626, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0573679655790329, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.052745163440704346, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.050586212426424026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0448724590241909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04294703155755997, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029155317693948746, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.025231383740901947, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0239174272865057, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02359449677169323, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014615391381084919, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012334596365690231, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012196964584290981, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011330993846058846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011128921993076801, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007668523117899895, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007616616785526276, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007244455628097057, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005030982196331024, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10650178045034409, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10650178045034409, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.35.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09214611351490021, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.0863029882311821, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08333692699670792, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07565993070602417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.043074123561382294, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04043305665254593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05013109743595123, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0461898110806942, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04371602460741997, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03877861052751541, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.037125471979379654, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0254604984074831, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.022083261981606483, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.020669028162956238, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.020328238606452942, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012740333564579487, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0106623824685812, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010505815967917442, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.00979769229888916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009581713937222958, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00668009277433157, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006635791622102261, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006251306273043156, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004357615020126104, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09214611351490021, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09214611351490021, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.35.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2272682636976242, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21314360201358795, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.20807017385959625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1891559660434723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10664709657430649, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1012396365404129, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12103761732578278, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11085546761751175, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10820053517818451, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09603893756866455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09173578023910522, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06158033385872841, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05295069143176079, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05102355405688286, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05056137219071388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030756741762161255, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026011187583208084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.025837861001491547, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.023817922919988632, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023519964888691902, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015879200771450996, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015457980334758759, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015135887078940868, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009685372933745384, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11085546761751175, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11085546761751175, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.35.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21538443863391876, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18844643235206604, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17913660407066345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.14488324522972107, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10072347521781921, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09045641869306564, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11807361245155334, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10786733031272888, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10406141728162766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08083450794219971, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06961682438850403, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.061093755066394806, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05255592241883278, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0493113175034523, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04851997643709183, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030834738165140152, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02687220089137554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026655474677681923, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02319774404168129, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.022694066166877747, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01724729873239994, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018571484833955765, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0162530317902565, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01445676013827324, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10786733031272888, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10786733031272888, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.35.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1600504219532013, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.15034011006355286, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.14727738499641418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.13418060541152954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0753559023141861, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07185475528240204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.084196537733078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07752373069524765, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.07641702890396118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06801242381334305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06492205709218979, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04297159984707832, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03719593584537506, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03620629385113716, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03597681224346161, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02154015377163887, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.018826164305210114, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.018762506544589996, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01735357940196991, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01720980741083622, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.011485234834253788, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.011662420816719532, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011166011914610863, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008101885206997395, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.084196537733078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.084196537733078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.35.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2264474332332611, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21289852261543274, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.20859810709953308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19009441137313843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10643362998962402, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10156705975532532, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11904007196426392, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1093822494149208, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10794158279895782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09610309451818466, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09185904264450073, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06049443408846855, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05231741443276405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05097757279872894, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.050668273121118546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030266596004366875, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026107344776391983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026013702154159546, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.023980164900422096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.023779181763529778, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015810633078217506, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015563694760203362, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015368052758276463, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010054790414869785, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1093822494149208, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1093822494149208, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.35.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24081172049045563, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.21333318948745728, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20180149376392365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18024331331253052, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11071889847517014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09940443933010101, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13368964195251465, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12184479832649231, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11509627103805542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.0950772687792778, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09016113728284836, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0683070495724678, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05859862640500069, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05357634276151657, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.052325766533613205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03442069888114929, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.028348177671432495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02804694138467312, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025316176936030388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024529682472348213, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018783891573548317, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01881507784128189, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017173580825328827, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013416562229394913, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11509627103805542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11509627103805542, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.36.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10569630563259125, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09901043027639389, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09612338244915009, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0872403234243393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04944896697998047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0466887541115284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05691821873188019, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0522715225815773, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05019062012434006, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.044532619416713715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04264970123767853, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02898893691599369, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.024992255493998528, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02371939830482006, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02341587096452713, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01451253704726696, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012231987901031971, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012101968750357628, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011238389648497105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011048020794987679, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007613360416144133, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007541951723396778, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007207801565527916, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004986634943634272, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10569630563259125, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10569630563259125, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.36.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09089286625385284, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08513036370277405, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08209144324064255, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07449714094400406, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.042441967874765396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.03980925306677818, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.04965486377477646, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.045730214565992355, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04308213293552399, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.03824693337082863, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.03659415990114212, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02520216442644596, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.021850483492016792, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.020371049642562866, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02001556009054184, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012624870054423809, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010530268773436546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010360651649534702, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.009682106785476208, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009455712512135506, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0066402368247509, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006594706792384386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006189135834574699, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004357500001788139, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09089286625385284, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09089286625385284, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.36.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23388274013996124, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.21940642595291138, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.21439068019390106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19499541819095612, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10984563827514648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10439547151327133, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12426748126745224, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.113767609000206, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.111439049243927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.09895133227109909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09443077445030212, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06324238330125809, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.054364703595638275, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05252384394407272, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0520891435444355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03157158941030502, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026725104078650475, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02657509408891201, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02447514981031418, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024194277822971344, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016255594789981842, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01577744632959366, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015537526458501816, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009805130772292614, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.113767609000206, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.113767609000206, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.36.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.19582872092723846, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1652260720729828, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1538207232952118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1241692379117012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.08952295035123825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07788916677236557, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10864299535751343, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09932149201631546, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09410839527845383, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06893081963062286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06328332424163818, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05607329308986664, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.048399586230516434, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.044032804667949677, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.042950090020895004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028436876833438873, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024294408038258553, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023969408124685287, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020553596317768097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019864004105329514, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016128458082675934, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017306441441178322, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014834575355052948, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01358198281377554, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10864299535751343, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10864299535751343, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.36.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1582675278186798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1486913114786148, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.145651176571846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1327265501022339, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07451318204402924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07110203802585602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08311893790960312, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07662470638751984, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.07556256651878357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06723777949810028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06416875123977661, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.042382750660181046, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03672267496585846, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03575662896037102, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03553472459316254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.021190060302615166, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.018513215705752373, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.01844688318669796, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01705138385295868, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.016910461708903313, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01115849707275629, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.011350248008966446, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.010843333788216114, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00773972412571311, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08311893790960312, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08311893790960312, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.36.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22171129286289215, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2084948718547821, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.20441950857639313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.18622630834579468, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10428129881620407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09954511374235153, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11629112809896469, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10712124407291412, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10571523010730743, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0941569060087204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08985394984483719, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.059165894985198975, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05117213726043701, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04987926781177521, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04957466199994087, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.029529763385653496, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.025424696505069733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02533898502588272, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.023339154198765755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.023142075166106224, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015295629389584064, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014993447810411453, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01485925167798996, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009466354735195637, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11629112809896469, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11629112809896469, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.36.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.23443660140037537, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.20730815827846527, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.19582276046276093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17504316568374634, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10755370557308197, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.0963202714920044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13008712232112885, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.11876797676086426, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11178421229124069, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09235920757055283, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08779232203960419, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06646983325481415, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.057095516473054886, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05199044197797775, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.050722453743219376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03345243260264397, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.027449505403637886, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.027112653478980064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.024491049349308014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.023680854588747025, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01819472759962082, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018170341849327087, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01656033843755722, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012822899036109447, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11178421229124069, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11178421229124069, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.37.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10881251841783524, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1021774411201477, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09941411763429642, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0904574915766716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0510198250412941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0483274906873703, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.058288589119911194, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05363529548048973, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.051738958805799484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04607653617858887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04405508562922478, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029629522934556007, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.025637594982981682, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.024445073679089546, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024156415835022926, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01483693066984415, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012533646076917648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012413689866662025, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011525556445121765, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011343475431203842, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00774925434961915, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0076091778464615345, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007362097967416048, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0049050115048885345, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10881251841783524, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10881251841783524, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.37.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09648720175027847, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09065274894237518, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08794983476400375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08004961907863617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.045228008180856705, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04271354526281357, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0521651990711689, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04793871194124222, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04586748778820038, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04086863994598389, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0391744002699852, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.026515508070588112, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.022902343422174454, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0216837078332901, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021384967491030693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013258436694741249, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011124825105071068, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010995869524776936, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010233459062874317, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010050375945866108, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0069232843816280365, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006792081985622644, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006538300309330225, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004381197039037943, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09648720175027847, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09648720175027847, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.37.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23671013116836548, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22260446846485138, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2178075760602951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19850286841392517, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11137409508228302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10617991536855698, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12549744546413422, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11504102498292923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11294467747211456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10065258294343948, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09619680047035217, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06390337646007538, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05497003346681595, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.053272590041160583, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0528595969080925, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031875383108854294, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027100160717964172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02696160599589348, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024876518175005913, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024615759029984474, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01641898974776268, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015953823924064636, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015760881826281548, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009923025965690613, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11504102498292923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11504102498292923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.37.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21750733256340027, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.17424385249614716, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15608659386634827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12434665858745575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09926233440637589, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08112665265798569, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1245737373828888, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11380724608898163, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10482846945524216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0721893310546875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06553781032562256, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06429190933704376, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.055108197033405304, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.048543985933065414, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.046894319355487823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032444097101688385, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026416966691613197, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.025844939053058624, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.021466227248311043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.020341357216238976, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.018040413036942482, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018694546073675156, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016011398285627365, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01407425943762064, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11380724608898163, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11380724608898163, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.37.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1692952811717987, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1592327058315277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1560613214969635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14224828779697418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07976817339658737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07613852620124817, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08893550932407379, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08195637911558151, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08085444569587708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07202523946762085, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06873037666082382, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.045350972563028336, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03926630690693855, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.038262080401182175, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.038024406880140305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.022673657163977623, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.019786009564995766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.019719578325748444, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01822887361049652, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01808329112827778, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01193934679031372, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01209797989577055, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.011609245091676712, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008223053067922592, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08893550932407379, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08893550932407379, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.37.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22983574867248535, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21629385650157928, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2121008038520813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19330032169818878, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10807029902935028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10321684181690216, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12072685360908508, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11095396429300308, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10954245924949646, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09766970574855804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0933346152305603, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.061409879475831985, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.053036127239465714, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05171876773238182, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.051409076899290085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03073166310787201, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026419276371598244, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026329554617404938, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02427150122821331, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02407056838274002, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016071928665041924, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015644092112779617, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015629133209586143, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009992185980081558, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11095396429300308, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11095396429300308, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.37.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.23617087304592133, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.20903952419757843, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.19703897833824158, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17609895765781403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.10845893621444702, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09694711118936539, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1320473700761795, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12050569802522659, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11284329742193222, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09321323037147522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08864398300647736, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06722156703472137, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05797404423356056, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.052505191415548325, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05114461109042168, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03377864137291908, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02782081626355648, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.0274564940482378, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.024863239377737045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.023998841643333435, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018192503601312637, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018572255969047546, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016421550884842873, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013206391595304012, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11284329742193222, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11284329742193222, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.38.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11032882332801819, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10334211587905884, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10039495676755905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09118489921092987, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05164935439825058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04877295717597008, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05912915617227554, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.054427001625299454, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05242947116494179, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04647713527083397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04439827427268028, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030046828091144562, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0260161105543375, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.024750830605626106, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024447571486234665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015054818242788315, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012723145075142384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012593179941177368, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011672313325107098, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011481798253953457, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007873914204537868, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007772753480821848, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007457515690475702, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005059335380792618, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11032882332801819, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11032882332801819, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.38.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09555605053901672, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.0895405039191246, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08666285127401352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07872168719768524, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.044733885675668716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.042104706168174744, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05186908319592476, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0476851686835289, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04541924595832825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04031874239444733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0385933592915535, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.026345891878008842, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02278869040310383, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.021450696513056755, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.021128444001078606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013182378374040127, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011035664938390255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010891963727772236, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01013612374663353, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009931844659149647, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006890242453664541, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.006801434326916933, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006470047868788242, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004424781538546085, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09555605053901672, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09555605053901672, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.38.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23651833832263947, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22199837863445282, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2170274555683136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.19731906056404114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11127933859825134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10588949918746948, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12567567825317383, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11504338681697845, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11289341002702713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10025976598262787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09571098536252975, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06394609063863754, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05494853854179382, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.053228117525577545, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05282098054885864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03191947564482689, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02708921954035759, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026953309774398804, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02480607107281685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02454325184226036, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016425540670752525, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01598178781569004, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01575411483645439, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009972605854272842, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11504338681697845, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11504338681697845, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.38.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.19355395436286926, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1706250160932541, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16356678307056427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1312628984451294, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09080886840820312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08282173424959183, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10382044315338135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0949380174279213, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09306536614894867, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07121173292398453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06283992528915405, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05352367088198662, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.045867107808589935, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.043993934988975525, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.043568067252635956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.026891840621829033, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02334979921579361, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023236384615302086, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019581733271479607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019289011135697365, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.014730171300470829, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015210249461233616, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014154968783259392, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0112095195800066, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10382044315338135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10382044315338135, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.38.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.17803272604942322, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.16742432117462158, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1640855073928833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.14957354962825775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08387947827577591, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08005782216787338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09348545223474503, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08618311583995819, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08504406362771988, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07575232535600662, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07228501886129379, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0477033406496048, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04129907861351967, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.040242720395326614, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03999164700508118, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.023853745311498642, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02079804614186287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02072376385331154, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01916048675775528, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01900496333837509, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012570216320455074, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.012707548215985298, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012227851897478104, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008615823462605476, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09348545223474503, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09348545223474503, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.38.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2331112027168274, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2192510962486267, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21492999792099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19580121338367462, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1096402257680893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10469334572553635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12223057448863983, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11259078234434128, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1111510619521141, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09908431768417358, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09447783976793289, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.062159448862075806, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.053789395838975906, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.052445121109485626, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.052120424807071686, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03104090318083763, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026716237887740135, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026625484228134155, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02453087456524372, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02432493306696415, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016071109101176262, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015721306204795837, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015624548308551311, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009889539331197739, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11259078234434128, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11259078234434128, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.38.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.23929589986801147, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2121513932943344, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20017653703689575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17878101766109467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1099826842546463, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09847769141197205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13304118812084198, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12201713770627975, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11437489837408066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09465663135051727, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.08979224413633347, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06780765950679779, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.058701012283563614, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.053221192210912704, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05185085907578468, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.034183405339717865, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02813602425158024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.027763022109866142, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025155099108815193, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02428257092833519, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018534697592258453, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01871466636657715, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016807451844215393, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013230686075985432, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11437489837408066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11437489837408066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.39.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11309516429901123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10601469874382019, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10292338579893112, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09351100027561188, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0530407540500164, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05006451532244682, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06103897839784622, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05602231249213219, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05380597710609436, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04774288833141327, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04571100324392319, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.031061120331287384, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02680266834795475, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.025446487590670586, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.025125538930296898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015569353476166725, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013127842918038368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012991057708859444, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012063424102962017, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011858521029353142, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008185303770005703, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00809562299400568, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00773985031992197, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00534966541454196, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11309516429901123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11309516429901123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.39.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09951213002204895, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09326431155204773, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0904214009642601, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0821305438876152, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.046621836721897125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04395236074924469, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.053843479603528976, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04947401210665703, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04732486233115196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04200448840856552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04019923135638237, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.027349097654223442, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023646121844649315, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022352105006575584, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.022041313350200653, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013683023862540722, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011465811170637608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01132919266819954, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010519588366150856, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010324884206056595, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007116592489182949, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0070015559904277325, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006705762818455696, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004500860348343849, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09951213002204895, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09951213002204895, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.39.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23699568212032318, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22245930135250092, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2175218015909195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1978118121623993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11149266362190247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10605712234973907, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12596286833286285, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11533360928297043, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1131068766117096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10048381984233856, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09598005563020706, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06405849754810333, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05509517714381218, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05332911014556885, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05291347578167915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03196876123547554, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027122674509882927, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026979925110936165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024829743430018425, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.024563586339354515, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01645401120185852, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01596541702747345, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.015773985534906387, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009891711175441742, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11533360928297043, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11533360928297043, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.39.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.17108869552612305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.14020490646362305, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1310471147298813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10776151716709137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0776987075805664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.06767868995666504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0921332985162735, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.08380546420812607, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.08132907748222351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05973074212670326, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05403880402445793, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.04730096831917763, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.041128963232040405, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.038330331444740295, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.03766443952918053, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.024236150085926056, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02151389792561531, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.021366870030760765, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.018375953659415245, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01795404963195324, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01408512145280838, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015536176972091198, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.013287227600812912, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.012639053165912628, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10776151716709137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10776151716709137, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.39.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.18579144775867462, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.17465002834796906, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.17112311720848083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15585386753082275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08754433691501617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08350999653339386, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09765207022428513, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09000363200902939, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08879654854536057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07902911305427551, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07535456120967865, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.049824170768260956, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04312116280198097, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04200133681297302, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04174065589904785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.024917498230934143, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021690944209694862, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.021616017445921898, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01996631734073162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.019799426198005676, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013123602606356144, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013225371018052101, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012758642435073853, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.008923333138227463, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09765207022428513, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.09765207022428513, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.39.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.23814824223518372, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2239091843366623, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.21948310732841492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20001783967018127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11208375543355942, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10701266676187515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12518854439258575, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11515147238969803, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11366075277328491, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10123898088932037, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09665662050247192, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06382319331169128, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.055086899548769, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05369339883327484, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.053360894322395325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03192434087395668, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027486160397529602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02739192359149456, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02525850012898445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02505011111497879, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016742922365665436, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016391487792134285, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016281718388199806, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010595527477562428, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11515147238969803, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11515147238969803, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.39.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.245829239487648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.21933519840240479, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20838148891925812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1863517314195633, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11306897550821304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10227321088314056, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1346772015094757, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12376111000776291, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11730141937732697, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09772659838199615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09263864904642105, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06877508759498596, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.059291571378707886, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.054430436342954636, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05323249101638794, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03450392931699753, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.028245585039258003, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02794996276497841, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025168156251311302, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024390073493123055, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018345197662711143, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.017992407083511353, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01677691377699375, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.011981295421719551, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11306897550821304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11306897550821304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.40.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11561616510152817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10831817984580994, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10521072149276733, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09562620520591736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05422212556004524, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05122314393520355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.062133874744176865, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.057148855179548264, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.055035270750522614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04880357161164284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04665112867951393, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03158523514866829, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02731488272547722, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02599194273352623, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.025675121694803238, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015811510384082794, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013363251462578773, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013227115385234356, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012265090830624104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012065120972692966, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008278071880340576, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008166098967194557, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007851813919842243, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0053210994228720665, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11561616510152817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11561616510152817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.40.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10052650421857834, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09422217309474945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0913129597902298, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08300862461328506, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.047162044793367386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04443845897912979, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.054601799696683884, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05013430863618851, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04785778373479843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04250315576791763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04068572819232941, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02774755284190178, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.023971382528543472, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.022620180621743202, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.022292926907539368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.013884913176298141, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011618342250585556, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011468455195426941, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010666928254067898, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010456381365656853, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0072451308369636536, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0071167657151818275, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00681558670476079, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004590342286974192, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10052650421857834, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10052650421857834, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.40.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2427184283733368, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22780589759349823, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22270487248897552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20270377397537231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11431743949651718, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10875049233436584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12896087765693665, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11813647300004959, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11597979068756104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10300934314727783, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09835577756166458, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06564252823591232, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05644332617521286, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05467839539051056, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05426129326224327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03276274725794792, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.027806676924228668, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02766578644514084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02545849047601223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025188401341438293, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016837188974022865, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016353068873286247, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016152117401361465, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010131294839084148, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11597979068756104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11597979068756104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.40.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20085790753364563, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16771367192268372, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1528388410806656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12408449500799179, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09176354110240936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07801426947116852, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11552168428897858, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10562992095947266, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09669774770736694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0699266642332077, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06581825762987137, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05974895879626274, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05102863162755966, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0446866899728775, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.043075885623693466, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030112069100141525, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.023956065997481346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023379649966955185, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019831586629152298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.018752943724393845, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016586309298872948, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016623638570308685, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01463271677494049, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01205171924084425, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11552168428897858, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11552168428897858, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.40.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.19211697578430176, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1806376874446869, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1770230531692505, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.16122332215309143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09056854248046875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08643238246440887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10109780728816986, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09307855367660522, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09183099120855331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08174723386764526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07804901897907257, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0516078881919384, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.044582005590200424, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04343866556882858, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0431739017367363, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02582424320280552, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.022422946989536285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02235194481909275, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.020641637966036797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.020468661561608315, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013636579737067223, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013660403899848461, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.013266272842884064, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00920281745493412, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10109780728816986, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10109780728816986, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.40.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24021212756633759, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2260178178548813, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22154958546161652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20178934931755066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11312691867351532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1079864427447319, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12620210647583008, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11619371920824051, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11469117552042007, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10213764756917953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09747768938541412, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0642412081360817, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.055549319833517075, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05414995178580284, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.053816817700862885, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03209850192070007, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02765815332531929, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027565162628889084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025395067408680916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02518058754503727, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01671229861676693, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016395412385463715, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01625284180045128, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010465292260050774, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11619371920824051, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11619371920824051, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.40.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24839109182357788, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22170856595039368, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21073324978351593, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18804511427879333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11433747410774231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10348289459943771, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1363014280796051, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12506599724292755, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11855512112379074, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09870882332324982, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09349862486124039, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0693947821855545, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05983458831906319, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.054993197321891785, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.053799405694007874, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03472931310534477, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.028467213734984398, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02817646972835064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02531968615949154, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024544423446059227, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018308185040950775, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018020199611783028, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01672876626253128, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.011861005797982216, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11433747410774231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11433747410774231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.41.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11574751138687134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10842996835708618, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10521034896373749, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09550899267196655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.054298534989356995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05125650018453598, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06259119510650635, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05738159269094467, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.055132102221250534, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04883630946278572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04671522602438927, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03182203322649002, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.027434810996055603, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.026049412786960602, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02571331337094307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015930768102407455, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013383403420448303, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013240969739854336, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012265854515135288, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0120515963062644, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008324522525072098, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008178884163498878, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00786957424134016, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005305287428200245, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11574751138687134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11574751138687134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.41.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10255484282970428, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09604985266923904, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09299544990062714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08443538099527359, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04805895313620567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.045257583260536194, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05588746815919876, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.051127370446920395, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04879099130630493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04321964457631111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04144753888249397, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.028343496844172478, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.024430660530924797, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.023049287497997284, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02271794155240059, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014191052876412868, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.011839953251183033, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011693897657096386, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.010856159031391144, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010644330643117428, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0074250889010727406, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007258368190377951, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006988861132413149, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004688997752964497, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10255484282970428, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10255484282970428, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.41.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24369598925113678, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22841651737689972, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2232581079006195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20279133319854736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11474207043647766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10910812020301819, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12972378730773926, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1187371090054512, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11649968475103378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10320143401622772, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09848542511463165, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06605353206396103, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.056749388575553894, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.054935675114393234, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05448849871754646, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032984666526317596, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028007101267576218, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027869483456015587, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025612950325012207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025341499596834183, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017027191817760468, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01660063862800598, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01631823368370533, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01045935321599245, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11649968475103378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11474207043647766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.41.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21418944001197815, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1692376583814621, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15348146855831146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12688547372817993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09818554669618607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08017196506261826, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11842223256826401, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10783764719963074, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10320873558521271, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07071896642446518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06488925218582153, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06109091639518738, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05249115079641342, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04814011976122856, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04706435278058052, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03089313581585884, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02627817541360855, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.025996794924139977, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02127065323293209, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02054782211780548, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017349692061543465, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018366338685154915, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01604018174111843, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014182490296661854, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10783764719963074, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10783764719963074, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.41.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.19580551981925964, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.18408071994781494, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.18036726117134094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.16436178982257843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09230894595384598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08807513117790222, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10292297601699829, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09488882124423981, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09359142184257507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08328311145305634, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07939954847097397, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0525033175945282, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04542364552617073, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04426322504878044, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04397839307785034, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.0262394268065691, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.022785300388932228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.022706951946020126, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02094450034201145, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.020776279270648956, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013755780644714832, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01378735899925232, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.013372515328228474, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009176569059491158, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10292297601699829, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10292297601699829, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.41.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2413652092218399, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22693099081516266, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2224556803703308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2026272863149643, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11365523189306259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10846621543169022, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12684768438339233, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1167764738202095, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11525803059339523, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10257858783006668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0978371873497963, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06454039365053177, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05579908564686775, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.0543791837990284, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05404948443174362, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03224724158644676, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.027715561911463737, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02761654742062092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025418663397431374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02520540915429592, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01675010286271572, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01632722094655037, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01628362014889717, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01028397399932146, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1167764738202095, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11525803059339523, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.41.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24916602671146393, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2230042964220047, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21197353303432465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18910935521125793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11497397720813751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10417119413614273, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1371074616909027, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12589259445667267, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11910072714090347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.099507637321949, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09421054273843765, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07012126594781876, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.0604902021586895, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05551718547940254, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05427604168653488, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.035333290696144104, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029075080528855324, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028749581426382065, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026009924709796906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025220589712262154, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018948450684547424, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01887126825749874, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01731140725314617, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013003120198845863, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11497397720813751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11497397720813751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.42.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12478983402252197, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11704415082931519, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11379218846559525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1034710630774498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05863513797521591, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.055456649512052536, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06698747724294662, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.061654701828956604, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05950656533241272, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05280407518148422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05043013021349907, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03404582291841507, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029462778940796852, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02810455486178398, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027773777022957802, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017023218795657158, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014398363418877125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01426524668931961, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013205003924667835, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012994813732802868, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008863506838679314, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008711406029760838, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008419377729296684, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005587822292000055, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11379218846559525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11379218846559525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.42.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10906823724508286, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10224400460720062, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09924732148647308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09026017040014267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05125037208199501, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04840131476521492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0590229406952858, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05416256934404373, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.052046243101358414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.046199455857276917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.044213876128196716, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029985057190060616, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02588466741144657, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02456543780863285, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024245968088507652, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0150011470541358, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01258174329996109, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012446252629160881, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011540385894477367, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011343110352754593, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007805901113897562, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007626653648912907, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007380322553217411, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004874585662037134, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10906823724508286, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10906823724508286, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.42.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24707463383674622, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23189669847488403, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22685043513774872, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.2065524011850357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11647767573595047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1108427345752716, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1312587410211563, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12017041444778442, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11814871430397034, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10490517318248749, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10024203360080719, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06681796908378601, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05744076147675514, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.055720120668411255, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0553031861782074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.033349502831697464, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028324827551841736, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028191691264510155, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025929760187864304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025660019367933273, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017161667346954346, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016636336222290993, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016482241451740265, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010286437347531319, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11647767573595047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11647767573595047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.42.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1985020935535431, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1678815633058548, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15651902556419373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1264379769563675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09047170728445053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07907130569219589, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10933016985654831, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10023696720600128, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09548915922641754, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07049200683832169, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06402405351400375, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05632074549794197, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04825732856988907, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04367519170045853, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.042554549872875214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028222864493727684, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.022840091958642006, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.022511109709739685, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0188945010304451, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01810428686439991, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015076948329806328, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.014902248047292233, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.013587597757577896, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010157231241464615, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10933016985654831, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10933016985654831, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.42.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20119403302669525, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.18893206119537354, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1851489394903183, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.16855807602405548, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09487055242061615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09047094732522964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10589800775051117, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0975634753704071, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09623988717794418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08554967492818832, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08153726160526276, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.054051753133535385, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.046716779470443726, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.045504212379455566, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04521583020687103, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.027021316811442375, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.0234342273324728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02335105836391449, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02153286524116993, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02134673297405243, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014207221567630768, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014192420989274979, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.013811042532324791, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009454290382564068, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10589800775051117, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10589800775051117, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.42.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24351070821285248, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22879908978939056, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22421272099018097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20412535965442657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11470691114664078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1093946099281311, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12794357538223267, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11788204312324524, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11632741242647171, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1034146100282669, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09857180714607239, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06511468440294266, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05633900687098503, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.0548931360244751, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.054546426981687546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03250535577535629, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02796744741499424, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027871333062648773, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02562894858419895, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.025408050045371056, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016838356852531433, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016471168026328087, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01635674387216568, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010357728227972984, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11632741242647171, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11470691114664078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.42.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25300315022468567, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22681385278701782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21607540547847748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19253243505954742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1168159544467926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10613010078668594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1385001391172409, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.127329021692276, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12080321460962296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10102228075265884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09541571140289307, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07050134241580963, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.060882117599248886, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05610785633325577, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05494041368365288, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03525904193520546, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.028933310881257057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028628189116716385, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025732634589076042, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024972403421998024, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018479960039258003, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018129566684365273, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.016914421692490578, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.011745119467377663, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1168159544467926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1168159544467926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.43.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12344594299793243, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11555614322423935, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11212942749261856, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10181453824043274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05796278268098831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05468616262078285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06664525717496872, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06125078722834587, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.058905333280563354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05211601406335831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.049744896590709686, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03389376774430275, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029303859919309616, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.027811355888843536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027446847409009933, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016990572214126587, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014298887923359871, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01415103767067194, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013107151724398136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012880036607384682, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008911068551242352, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00875002983957529, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008429720997810364, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005692724604159594, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11555614322423935, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11555614322423935, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.43.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10743986815214157, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1005263477563858, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09738066792488098, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08846064656972885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05041585490107536, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04746874421834946, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05841316282749176, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.053590819239616394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05121965706348419, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.045327648520469666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.043326642364263535, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029668906703591347, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02561439760029316, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02417946420609951, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.023841574788093567, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014850436709821224, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012411337345838547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012261420488357544, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011374985799193382, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01115366443991661, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007751328870654106, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007595803588628769, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00728601822629571, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004888217430561781, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10743986815214157, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10743986815214157, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.43.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24543902277946472, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22997206449508667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2247583419084549, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20422737300395966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11559518426656723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10989638417959213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13082005083560944, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11968798190355301, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1174321249127388, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10393985360860825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09920845925807953, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06661482155323029, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05718330666422844, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05530285835266113, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05486968159675598, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03324982896447182, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02812081389129162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027972964569926262, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0256892628967762, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.0254062470048666, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017090342938899994, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016541453078389168, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016356226056814194, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010206041857600212, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11559518426656723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11559518426656723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.43.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.19495327770709991, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1566874384880066, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.14236649870872498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11636931449174881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.08536355942487717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.06913977116346359, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10889080911874771, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09667054563760757, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09129359573125839, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06520459055900574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.059995122253894806, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.054839037358760834, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04772556200623512, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04271702840924263, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.041439589112997055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028316032141447067, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024599172174930573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.024283260107040405, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02111588977277279, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02035108394920826, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016723908483982086, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.018542185425758362, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01531154289841652, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.015292887575924397, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11636931449174881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11636931449174881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.43.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20048217475414276, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1881490796804428, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.18422400951385498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.16767767071723938, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09459589421749115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09011857211589813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10585805773735046, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09736091643571854, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09601520746946335, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08517498522996902, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08123654127120972, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05408614128828049, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.046658143401145935, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.045391954481601715, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.045093368738889694, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.027110904455184937, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02341747283935547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.023332253098487854, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02150825224816799, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.021317169070243835, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014367656782269478, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014245651662349701, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.013953213579952717, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009558378718793392, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10585805773735046, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10585805773735046, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.43.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2454511970281601, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23047977685928345, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2257329821586609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20550444722175598, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11565041542053223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11023157835006714, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1294553577899933, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11899232119321823, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11738065630197525, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10420440882444382, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09938023239374161, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06590206176042557, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.056945718824863434, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05540984869003296, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05505138635635376, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03299616649746895, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.028352100402116776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02825421281158924, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02599264122545719, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02576489932835102, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01733699068427086, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016885053366422653, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016833381727337837, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01085547637194395, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11565041542053223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11565041542053223, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.43.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2526020109653473, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22736574709415436, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21654076874256134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19354863464832306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1169378012418747, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10640048235654831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1391175389289856, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1276969313621521, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12082041800022125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10168861597776413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09619713574647903, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07092396169900894, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.0613364651799202, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.056392136961221695, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05517971143126488, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03559314087033272, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.0294638704508543, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02912849187850952, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02642928622663021, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02565551921725273, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018833663314580917, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01899002306163311, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017202744260430336, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012983197346329689, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10168861597776413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10168861597776413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.44.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1273927092552185, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11941521614789963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11613161116838455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10549867898225784, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05994603410363197, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05668468400835991, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0683915838599205, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06294235587120056, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06085580214858055, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.053915683180093765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05140673741698265, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03473152592778206, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.030073311179876328, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02871006168425083, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02838066965341568, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01736360602080822, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014678021892905235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014547000639140606, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01343521662056446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013224526308476925, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008991226553916931, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008830721490085125, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00853768177330494, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005601106211543083, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11613161116838455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11613161116838455, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.44.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11220575124025345, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10517093539237976, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10214740037918091, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09280502051115036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05277553200721741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04984287545084953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06058846786618233, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.055703870952129364, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.053572673350572586, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.047484032809734344, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0453927144408226, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.030806424096226692, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026617322117090225, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02530219778418541, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024986572563648224, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015410251915454865, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012953716330230236, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012822325341403484, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01186749804764986, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011666111648082733, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008009872399270535, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007844122126698494, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007582590915262699, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005004295147955418, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11220575124025345, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11220575124025345, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.44.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2508257329463959, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23528730869293213, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23013854026794434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20927605032920837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.118423692882061, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11258029192686081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13348214328289032, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12229713797569275, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12014055252075195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10653012990951538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10169035941362381, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06795745342969894, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.058477845042943954, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05665971338748932, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05622617155313492, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03392017260193825, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028795285150408745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028652887791395187, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0263256523758173, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026044586673378944, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017424387857317924, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016917090862989426, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016731558367609978, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010458642616868019, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10653012990951538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10653012990951538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.44.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20376187562942505, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16333359479904175, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1462104171514511, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1214589923620224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09310171753168106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07388123124837875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11663695424795151, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1065262109041214, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09831055998802185, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06852222234010696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06500502675771713, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.060322001576423645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.051566578447818756, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04543241485953331, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04388057067990303, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030420642346143723, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024408526718616486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023874124512076378, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019791854545474052, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.018697494640946388, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01680471934378147, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01695350930094719, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014914397150278091, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01236847136169672, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11663695424795151, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11663695424795151, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.44.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20477363467216492, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1922997087240219, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1882864236831665, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.17145808041095734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09664599597454071, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09208816289901733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10807731002569199, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0994681864976883, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09803970158100128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08707369118928909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08305495232343674, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.055206429213285446, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.0476582795381546, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04637208953499794, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.046073269098997116, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.0276403296738863, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02393777295947075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02385287545621395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.021986262872815132, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02180021069943905, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014609673991799355, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014576634392142296, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014193207025527954, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009800449945032597, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10807731002569199, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10807731002569199, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.44.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.248054638504982, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2329215258359909, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22814683616161346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20761564373970032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11690758913755417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1114291399717331, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13063080608844757, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12030366063117981, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11864069104194641, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10530208796262741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10033068805932999, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06647545844316483, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05750472843647003, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05596895143389702, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.055609483271837234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03320249542593956, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02851865440607071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.028414569795131683, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02611715719103813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.025884542614221573, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017211049795150757, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016821131110191345, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016695324331521988, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.0105877835303545, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11690758913755417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11690758913755417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.44.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2581538259983063, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23197965323925018, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22101861238479614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19731426239013672, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1194704994559288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10869047045707703, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1421566754579544, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13027627766132355, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12347859889268875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10367906093597412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09805171191692352, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07262980192899704, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06254927814006805, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05761319771409035, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.056394580751657486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.036630626767873764, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.030064858496189117, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.029736017808318138, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02691815048456192, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02613690495491028, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019756559282541275, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019321303814649582, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018158089369535446, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013165362179279327, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10367906093597412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10367906093597412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.45.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.13064537942409515, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12227295339107513, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11884142458438873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10788217931985855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06145533546805382, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05802213400602341, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07046405225992203, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06464702636003494, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.062421124428510666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05516739934682846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05264698341488838, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03585793077945709, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03091602399945259, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.029459578916430473, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02911137044429779, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01795436441898346, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01510052103549242, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01495642401278019, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013808493502438068, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013584272935986519, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009383616968989372, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009146396070718765, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008888838812708855, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005866219289600849, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10788217931985855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10788217931985855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.45.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11401069164276123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10671605169773102, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10353662073612213, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09396953880786896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.053640685975551605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05056456848978996, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06183723732829094, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05672289431095123, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05448652431368828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04818820580840111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.046052929013967514, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03145325928926468, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.027134602889418602, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02573074772953987, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.025392848998308182, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015739671885967255, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013187897391617298, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013041087426245213, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012065766379237175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011849637143313885, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008204364217817783, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008021646179258823, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007746824063360691, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005127242300659418, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11401069164276123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11401069164276123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.45.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2518177628517151, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23581917583942413, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23042984306812286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20920869708061218, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11875512450933456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11281117051839828, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13404209911823273, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12279021739959717, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1206149160861969, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10658567398786545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10168996453285217, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06827890127897263, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05867850407958031, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05683644488453865, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05638958141207695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.034089528024196625, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028901921585202217, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.0287675429135561, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026378270238637924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02609836310148239, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017536580562591553, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017011694610118866, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01682169735431671, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010552420280873775, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10658567398786545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10658567398786545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.45.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2199643850326538, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.17587190866470337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1582554131746292, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12868131697177887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09720572084188461, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08060714602470398, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12453009933233261, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11356877535581589, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10602845251560211, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07382968813180923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06959255039691925, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06418299674987793, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.054712072014808655, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04721488431096077, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04530283436179161, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032179828733205795, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.025142911821603775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02463809959590435, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020687464624643326, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019357789307832718, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017375662922859192, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017367081716656685, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014959927648305893, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.012240960262715816, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11356877535581589, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11356877535581589, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.45.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20350095629692078, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19093690812587738, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.18690899014472961, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1700599193572998, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09611519426107407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0915205180644989, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10760083049535751, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09898383170366287, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09755014628171921, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0865178257226944, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08249673247337341, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.054986048489809036, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.0474415048956871, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04613090306520462, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04582339525222778, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02751326747238636, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02381310798227787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02372586913406849, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.021863123401999474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.021665850654244423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014541528187692165, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014507723040878773, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014109869487583637, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009753121063113213, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10760083049535751, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10760083049535751, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.45.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.248912513256073, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2335754632949829, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22869673371315002, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20814841985702515, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11743959039449692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11183951050043106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13142533600330353, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12091320753097534, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11917833238840103, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10572557151317596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10077807307243347, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06708702445030212, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.057855140417814255, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05627885460853577, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05590783432126045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03351543843746185, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.028817271813750267, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02871103771030903, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026407917961478233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.0261694323271513, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01756865158677101, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.0171971395611763, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01704992912709713, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011105988174676895, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10572557151317596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10572557151317596, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.45.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2609421908855438, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2344026416540146, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22316646575927734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1992260217666626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12084335833787918, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10983017832040787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14338579773902893, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13198734819889069, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12485580146312714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.1048332080245018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09905355423688889, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07319177687168121, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06338774412870407, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05829652026295662, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.057039014995098114, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03677283972501755, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03044796735048294, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.03012785129249096, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02726196125149727, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026491032913327217, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019581537693738937, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019622648134827614, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01796281524002552, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.01346815750002861, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.1048332080245018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.1048332080245018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.46.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12829869985580444, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12013913691043854, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11671867221593857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1059454157948494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.060361601412296295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.057002224028110504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0690985769033432, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06361419707536697, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06132151186466217, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0542471781373024, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.051727890968322754, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03512723743915558, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.030416300520300865, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02893761359155178, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.028597120195627213, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01757386513054371, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014830315485596657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014683726243674755, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01357379648834467, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013345213606953621, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00915563851594925, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00898911152034998, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008675344288349152, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005759371444582939, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11671867221593857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11671867221593857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.46.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1127588227391243, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10559992492198944, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10243920981884003, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09300622344017029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.053055476397275925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.050038158893585205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06108083948493004, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05612679198384285, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.053890034556388855, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04770158231258392, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04554985836148262, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.031066762283444405, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026840267702937126, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02545059472322464, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.025118539109826088, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01555365789681673, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013064905069768429, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012927509844303131, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011974824592471123, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011766359210014343, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008118991740047932, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007980059832334518, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007677912246435881, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005158125888556242, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1127588227391243, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1127588227391243, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.46.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.25181466341018677, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23599934577941895, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2306831032037735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20955829322338104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11877308785915375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11292750388383865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13407588005065918, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12283126264810562, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12059519439935684, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10678448528051376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10181969404220581, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06826410442590714, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0587078258395195, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.056865133345127106, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05640937387943268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03406323492527008, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028917208313941956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02877584658563137, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026417793706059456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026139123365283012, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017530212178826332, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017022350803017616, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016817955300211906, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010554933920502663, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10678448528051376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10678448528051376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.46.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20807905495166779, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16857510805130005, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15031373500823975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12379138916730881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0957166776061058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07699814438819885, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12151023745536804, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11075769364833832, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10013493150472641, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0696505606174469, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06696908921003342, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06264209747314453, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05339757725596428, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.046531468629837036, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04480106756091118, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03133420646190643, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02482886239886284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02412257343530655, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019904352724552155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01867520995438099, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016762996092438698, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017128892242908478, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014481640420854092, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.012208294123411179, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11075769364833832, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11075769364833832, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.46.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20350977778434753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19091413915157318, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.18682752549648285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.16990473866462708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09615544229745865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09152098000049591, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10759419202804565, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09903339296579361, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09760592132806778, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0864928811788559, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08236382156610489, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05497928708791733, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.047474972903728485, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04615875333547592, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04585083946585655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.027505192905664444, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02383469045162201, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.023746702820062637, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.021873081102967262, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.021674612537026405, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014520143158733845, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014532229863107204, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014087572693824768, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009770853444933891, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10759419202804565, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10759419202804565, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.46.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2503821551799774, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23489251732826233, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23000773787498474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2091292142868042, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1181136891245842, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11243706941604614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1320633441209793, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12161240726709366, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11988940834999084, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10621333122253418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10115630179643631, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0672854632139206, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.058157868683338165, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05656272917985916, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05618518218398094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.033592868596315384, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.028856931254267693, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.028749272227287292, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026401421055197716, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.026158152148127556, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017428597435355186, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017069905996322632, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01689857803285122, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010794337838888168, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10621333122253418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10621333122253418, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.46.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2665102481842041, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23975370824337006, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22863319516181946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.2039789855480194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1235332190990448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11255134642124176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14696139097213745, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1344214826822281, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1275789588689804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10719907283782959, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.10119812935590744, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07474561035633087, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06448028981685638, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.059543970972299576, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05833066999912262, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03776179626584053, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.031016014516353607, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030690236017107964, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02774862013757229, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026965955272316933, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020472150295972824, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019832847639918327, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01889076828956604, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.01342373713850975, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10719907283782959, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10719907283782959, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.47.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12702886760234833, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1189064085483551, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11561405658721924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10493961721658707, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.059644389897584915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05638721212744713, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06785611063241959, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0626765564084053, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06055734306573868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05359102785587311, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05100059136748314, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03451818600296974, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029956001788377762, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.028591053560376167, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.028262222185730934, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017258258536458015, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014664421789348125, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014532727189362049, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013426436111330986, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013218888081610203, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009012484923005104, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008906138129532337, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008563035167753696, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0057458230294287205, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11561405658721924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11561405658721924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.47.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10915017873048782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10225392878055573, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0992111936211586, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09005407243967056, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.051261503249406815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.048341862857341766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05889764428138733, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05423540994524956, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05205551162362099, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04608197137713432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.043944455683231354, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.02992735616862774, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.025931300595402718, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02457750029861927, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024251842871308327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014958914369344711, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012586873956024647, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012447385117411613, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011523622088134289, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011311933398246765, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007782929111272097, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007639948278665543, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0073465886525809765, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004862383008003235, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10915017873048782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10915017873048782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.47.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.251029908657074, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23528942465782166, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23014889657497406, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20909149944782257, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11827164143323898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11245030164718628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13270574808120728, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12210053205490112, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12002715468406677, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10630008578300476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10110561549663544, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06752729415893555, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05833505094051361, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05658603087067604, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.0561596043407917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03368839621543884, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028754452243447304, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028615497052669525, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026258880272507668, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025983627885580063, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017313165590167046, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.016861382871866226, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016632210463285446, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010390729643404484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10630008578300476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10630008578300476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.47.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20044519007205963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.17734481394290924, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16931359469890594, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1417844146490097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09313387423753738, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08470677584409714, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10887753963470459, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.09992124140262604, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09632889181375504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0766388475894928, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06937067955732346, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05618032068014145, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04821325093507767, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04511430487036705, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.044349826872348785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02825293131172657, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02383345179259777, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023601967841386795, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02062871679663658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.020127061754465103, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015439270064234734, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015596354380249977, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014486920088529587, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01120449136942625, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10887753963470459, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.10887753963470459, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.47.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20442451536655426, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19202657043933868, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.18814103305339813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1711418330669403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09653988480567932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09200308471918106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10751296579837799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09926032274961472, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09792084246873856, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08691053837537766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0827588364481926, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05484793335199356, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04756307229399681, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04632537439465523, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04603172466158867, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02743847668170929, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.023906908929347992, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.023824824020266533, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.021957585588097572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.0217665433883667, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014461759477853775, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014545717276632786, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01406160369515419, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009773770347237587, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10751296579837799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10751296579837799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.47.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24805328249931335, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23296894133090973, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22828739881515503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.20777370035648346, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11701278388500214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11151083558797836, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1303047090768814, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12022732943296432, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11866140365600586, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10535445809364319, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10025884956121445, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06637775897979736, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05750291049480438, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.056023575365543365, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05566887930035591, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.0331786572933197, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.0286007858812809, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.028498027473688126, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026191335171461105, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02597273699939251, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017344102263450623, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01692243106663227, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016866328194737434, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010759728960692883, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10535445809364319, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10535445809364319, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.47.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24067479372024536, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.21323230862617493, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2010907083749771, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.17905689775943756, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1104021891951561, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.09879808127880096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13332363963127136, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12270496040582657, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11480383574962616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.0947544127702713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.0896381288766861, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06783709675073624, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05870174616575241, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05315447226166725, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.051769841462373734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.033950116485357285, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02762654423713684, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02724435180425644, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.024497924372553825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.023600110784173012, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.017922131344676018, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.017789142206311226, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01611488126218319, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.011812028475105762, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11480383574962616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11480383574962616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.48.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12457588315010071, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1166890412569046, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11342495679855347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10297644138336182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05855041742324829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05531388521194458, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06695454567670822, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06163358315825462, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.059429068118333817, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05261845886707306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05026409775018692, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03404361009597778, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029475567862391472, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.028063680976629257, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027722124010324478, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01701602153480053, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014372057281434536, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014234947971999645, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013154963031411171, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012940832413733006, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008872056379914284, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008695353753864765, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008414680138230324, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005552897695451975, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1166890412569046, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1166890412569046, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.48.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1091119721531868, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10219753533601761, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0991138219833374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09002207964658737, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.051193974912166595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04827143996953964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0590125173330307, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05417172238230705, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05197692662477493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.046011604368686676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0440140925347805, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.029952822253108025, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.025909991934895515, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.024552730843424797, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.024228589609265327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014990908093750477, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012593363411724567, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012453087605535984, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011535730212926865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011329524219036102, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007842635735869408, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007662447169423103, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007414286956191063, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004913629032671452, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1091119721531868, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1091119721531868, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.48.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2503542900085449, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23475083708763123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2296844720840454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20865368843078613, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11809906363487244, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11227629333734512, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13259932398796082, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12196110934019089, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1198502853512764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10616994649171829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.1010528951883316, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06755275279283524, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05828779563307762, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.056523460894823074, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05610286444425583, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.033715758472681046, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028839996084570885, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028700804337859154, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026360217481851578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02609289065003395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017423123121261597, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017082454636693, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01674409583210945, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010787607170641422, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10616994649171829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10616994649171829, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.48.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.206820547580719, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1753380447626114, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16448010504245758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1358467936515808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09485813975334167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08312906324863434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11287195235490799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1028689593076706, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09899800270795822, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07394599169492722, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0671461969614029, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.058078717440366745, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05035297945141792, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04674220085144043, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04586929455399513, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.029434917494654655, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0260186567902565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02578621357679367, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.022288667038083076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02173255942761898, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016715317964553833, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01860794425010681, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01563126966357231, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.014934329316020012, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11287195235490799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11287195235490799, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.48.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20573575794696808, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19313828647136688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1892540454864502, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.17214785516262054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0971190556883812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0925576239824295, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10807199776172638, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0998430848121643, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09851620346307755, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0874381959438324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08320505917072296, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05514775216579437, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04782119020819664, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.046585146337747574, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04629033803939819, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.027561262249946594, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024020086973905563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.023937810212373734, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022056013345718384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.021868014708161354, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014493526890873909, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01458222046494484, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014086171053349972, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009765255264937878, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10807199776172638, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10807199776172638, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.48.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2478734403848648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2328297644853592, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22813761234283447, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2075299471616745, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11686176806688309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11144072562456131, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1300477683544159, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12006670236587524, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11854260414838791, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10524410754442215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10012440383434296, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06616342812776566, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.057405758649110794, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05593705177307129, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05559076368808746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03305787593126297, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02851504273712635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02841431461274624, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026112787425518036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.025889072567224503, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01719411090016365, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016809968277812004, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016708042472600937, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010594457387924194, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11686176806688309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11686176806688309, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.48.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24255691468715668, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2159704715013504, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20414556562900543, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1817660927772522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1116664782166481, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10037625581026077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13468316197395325, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12361284345388412, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11585238575935364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09623335301876068, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09093533456325531, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06880175322294235, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.05947783961892128, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.054035864770412445, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05265574902296066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03491928428411484, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02852819114923477, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028135821223258972, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02550806850194931, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.024638311937451363, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019163992255926132, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018897246569395065, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017436616122722626, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013301125727593899, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11585238575935364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11585238575935364, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.49.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.13470181822776794, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12620148062705994, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1228492259979248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11152053624391556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06331934779882431, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0599299818277359, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07172642648220062, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06623277813196182, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06427819281816483, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.056882284581661224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05407069995999336, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03640281781554222, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03164468705654144, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.030321085825562477, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.029998548328876495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.018189962953329086, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01549336314201355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.015364381484687328, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.014168589375913143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013961021788418293, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009425248950719833, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009283299557864666, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008984125219285488, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005867584142833948, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11152053624391556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11152053624391556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.49.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11662536859512329, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10927455872297287, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10625768452882767, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09650957584381104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05487176403403282, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05183766782283783, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0625714436173439, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.057629864662885666, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05568302795290947, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04930029809474945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04701901227235794, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03180081769824028, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02754046954214573, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.026279868558049202, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02598116174340248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015901658684015274, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013440435752272606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013319261372089386, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01229836791753769, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012109755538403988, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00825453270226717, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008093154989182949, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007843410596251488, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005135701969265938, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11662536859512329, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11662536859512329, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.49.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.253821462392807, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.2381042242050171, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23301465809345245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.2117300182580948, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.1196843758225441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11386088281869888, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1340576857328415, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12341193854808807, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12144149839878082, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10760360211133957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10237444192171097, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06821047514677048, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05897478014230728, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05725758895277977, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05684531107544899, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03403935208916664, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.029111485928297043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02898937277495861, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026595918461680412, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026339802891016006, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017539866268634796, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017076080664992332, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016891079023480415, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010569263249635696, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10760360211133957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10760360211133957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.49.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20816579461097717, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1849755495786667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1773700714111328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15133363008499146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0967145785689354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08792813867330551, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11125706136226654, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10249599814414978, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09995582699775696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08062788844108582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07253874838352203, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05712556466460228, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04926920309662819, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.0465407520532608, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04588460922241211, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028600743040442467, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02412179484963417, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023952294141054153, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.0208753552287817, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02043823152780533, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015193702653050423, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015077981166541576, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014312590472400188, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01016742642968893, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11125706136226654, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11125706136226654, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.49.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20827436447143555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19552338123321533, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.19153279066085815, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.17429512739181519, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09838080406188965, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09371177852153778, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10949470847845078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10114134848117828, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.0997747853398323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08851103484630585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0841980054974556, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05594554916024208, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04845356568694115, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.047200292348861694, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04689785838127136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.027957383543252945, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02434476837515831, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.024261269718408585, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022345170378684998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.022153634577989578, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014756280928850174, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014788947999477386, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014343000017106533, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00991593673825264, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10949470847845078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10949470847845078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.49.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24930109083652496, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2341349869966507, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22942182421684265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2086324691772461, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11756395548582077, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11205771565437317, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13077981770038605, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12082814425230026, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11923758685588837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10583982616662979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10060973465442657, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0666051134467125, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05775956064462662, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05627947673201561, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05592837929725647, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.033240657299757004, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.028668999671936035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.028566857799887657, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02623172663152218, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.0260079987347126, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017242860049009323, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.016865305602550507, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016752829775214195, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010581204667687416, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10583982616662979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10583982616662979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.49.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25017353892326355, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.2235020250082016, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21225734055042267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1888071596622467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11522366106510162, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10422826558351517, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13716869056224823, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12629178166389465, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11938256025314331, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09935124963521957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09362104535102844, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06983023136854172, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06039733439683914, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05538241192698479, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05414317175745964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03496317192912102, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02860647812485695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028279297053813934, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.025401432067155838, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.0245913527905941, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018443604931235313, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018055075779557228, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01680522784590721, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.01176069863140583, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11522366106510162, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11522366106510162, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.50.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.13232311606407166, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12385581433773041, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.12042301148176193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10924620926380157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06222086399793625, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05876205861568451, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0707915723323822, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06534026563167572, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06318622082471848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05584851652383804, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.053127508610486984, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.036003489047288895, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03127894178032875, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.029842741787433624, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02950003370642662, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01802043430507183, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01531867403537035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.015175526961684227, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.014006813056766987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013787662610411644, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009404025040566921, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009298508055508137, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00893393438309431, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006002585869282484, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10924620926380157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10924620926380157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.50.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11478536576032639, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1074054017663002, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10432573407888412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09461519867181778, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.053936392068862915, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05088083818554878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06180456280708313, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05689343065023422, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05476844683289528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04841570183634758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.046167176216840744, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03140558302402496, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.027198385447263718, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.025852376595139503, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.025528375059366226, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015695743262767792, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013234750367701054, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013101713731884956, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012104720808565617, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01190027967095375, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00816324632614851, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008014398626983166, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.0077207875438034534, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005112797953188419, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11478536576032639, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11478536576032639, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.50.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2529647648334503, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23705478012561798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2317558228969574, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.21043476462364197, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11917684227228165, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11327246576547623, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13377481698989868, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12308182567358017, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12095494568347931, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10697323828935623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10174866020679474, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06803888827562332, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05880168825387955, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05700819194316864, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05657930672168732, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0339520163834095, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02894924208521843, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028814522549510002, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02641366422176361, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026131195947527885, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01743980683386326, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01695922575891018, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016759170219302177, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010414905846118927, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10697323828935623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10697323828935623, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.50.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21620047092437744, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1751953810453415, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16089540719985962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1279197484254837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09370726346969604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07891693711280823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1214236170053482, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10725919902324677, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10066049546003342, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07254946976900101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06756992638111115, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06084829568862915, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05333879590034485, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.047280699014663696, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04580145701766014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03170140087604523, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0278073288500309, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.027436941862106323, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.024211078882217407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.023340394720435143, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.019066056236624718, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.021491754800081253, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01737034320831299, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.018112896010279655, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10725919902324677, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10725919902324677, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.50.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21243874728679657, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1994485855102539, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1953476518392563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1776294708251953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10041456669569016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09565208852291107, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11189068108797073, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10327646136283875, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10184673219919205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09032975882291794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08586869388818741, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05713704228401184, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04954266920685768, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.048244040459394455, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0479436032474041, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02864484302699566, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024984098970890045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.024902382865548134, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022947261109948158, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02275320142507553, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015245812013745308, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01532739121466875, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014824410900473595, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01045834831893444, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11189068108797073, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11189068108797073, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.50.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2522159814834595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23678897321224213, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.2319716513156891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.21098966896533966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1190173476934433, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11338858306407928, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13269203901290894, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1223427876830101, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.12073065340518951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10710936784744263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10185997188091278, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06757470965385437, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.058556586503982544, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.057043831795454025, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.056689728051424026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.033859752118587494, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.029194237664341927, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02909579873085022, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02673174813389778, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.026507286354899406, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01784081757068634, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017379190772771835, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.017350181937217712, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011187903583049774, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10710936784744263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10710936784744263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.50.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2512156069278717, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22513312101364136, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21393662691116333, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1903693974018097, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1159883588552475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10511942207813263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.137856587767601, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12716548144817352, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12003568559885025, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10033755004405975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09452411532402039, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07033104449510574, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06106356903910637, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05596163123846054, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05470234528183937, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.035300012677907944, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029298702254891396, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028947902843356133, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026197778061032295, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025389347225427628, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.018721209838986397, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.018998045474290848, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017044931650161743, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013057710602879524, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1159883588552475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1159883588552475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.51.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.13707400858402252, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12834036350250244, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1250404417514801, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11349660903215408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06450816988945007, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.061058636754751205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07303857803344727, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0674515888094902, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06548968702554703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05791129916906357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05505211278796196, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03707592189311981, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.0322275310754776, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03089187666773796, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.030574573203921318, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.018529150635004044, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.015767743811011314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01564488746225834, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01440162118524313, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014198103919625282, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009572607465088367, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009418364614248276, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.009125067852437496, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005915394052863121, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11349660903215408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11349660903215408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.51.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12012387067079544, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11252732574939728, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10949516296386719, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09938671439886093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05655061826109886, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0534742996096611, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0643971785902977, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05932347849011421, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05742597579956055, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.050801534205675125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04840989410877228, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03272267431020737, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028353143483400345, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.027093352749943733, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02679331973195076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016359299421310425, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013845376670360565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013725046068429947, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012658721767365932, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012469680048525333, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008483214303851128, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00831079576164484, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00806885864585638, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005249470937997103, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11252732574939728, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11252732574939728, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.51.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2581726610660553, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24205657839775085, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23687514662742615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.21518345177173615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.12182575464248657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11586212366819382, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13641391694545746, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1256432682275772, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12363189458847046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.1094878762960434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10410542786121368, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06938152015209198, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.06005001813173294, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05827716365456581, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05786057561635971, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03462880104780197, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.029598750174045563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02946982905268669, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.027022257447242737, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026750603690743446, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01779239811003208, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017324330285191536, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01713481731712818, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010651880875229836, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.1094878762960434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.1094878762960434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.51.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.22741994261741638, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18488453328609467, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17072694003582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.14960971474647522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10458243638277054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08761165291070938, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12278196215629578, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11281248927116394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1093738004565239, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07959236949682236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07530496269464493, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06319858878850937, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05433905869722366, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05058874189853668, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.049657709896564484, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03175154700875282, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026571346446871758, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026352446526288986, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.021685201674699783, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.021041778847575188, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.0172481220215559, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017227651551365852, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016053874045610428, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.012100369669497013, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11281248927116394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11281248927116394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.51.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21624860167503357, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.20292161405086517, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.19875389337539673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.18073046207427979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10218265652656555, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09735520929098129, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11397513747215271, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10510586202144623, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1036895364522934, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09187588840723038, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08749192953109741, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05822952091693878, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05037930607795715, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.049044251441955566, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04872887209057808, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.029147757217288017, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.025337379425764084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02524857595562935, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.023248955607414246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.023044588044285774, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01547024492174387, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.0154474638402462, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.015027064830064774, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010420341975986958, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11397513747215271, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11397513747215271, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.51.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2546873986721039, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23902051150798798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23411299288272858, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.21281690895557404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.12016778439283371, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1144862249493599, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1338036060333252, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12357954680919647, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.12193404883146286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10806678235530853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10268127173185349, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06813201308250427, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.059083059430122375, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05753610283136368, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05716071277856827, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03402571380138397, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.029318131506443024, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02921702153980732, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026809876784682274, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.026573825627565384, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01767292618751526, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017277153208851814, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01716294325888157, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010865225456655025, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10806678235530853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10806678235530853, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.51.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25720974802970886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23055899143218994, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21932931244373322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.1951066553592682, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11886678636074066, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.1078045591711998, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14150875806808472, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12992705404758453, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12291516363620758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10275286436080933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09677146375179291, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07216054946184158, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06236366927623749, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05731101334095001, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.056077808141708374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.036396171897649765, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.02995590679347515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.02961854636669159, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026756633073091507, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02595152147114277, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01965046301484108, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019327757880091667, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.0179943535476923, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013210620731115341, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10275286436080933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10275286436080933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.52.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.14262178540229797, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1334952861070633, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.13000686466693878, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11802872270345688, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06713642179965973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.06349638849496841, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07621652632951736, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0701950341463089, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06817031651735306, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06026412174105644, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05733582749962807, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03873542696237564, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03355444595217705, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03216118365526199, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.03183302283287048, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.019370248541235924, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.016441315412521362, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.016313092783093452, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.015021235682070255, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014809433370828629, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.010078363120555878, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009858611971139908, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.009598972275853157, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006253785453736782, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07621652632951736, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07621652632951736, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.52.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1245831847190857, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11659975349903107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11343839764595032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1029784232378006, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.058699607849121094, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05544974282383919, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06687577813863754, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06155403330922127, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0595860555768013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05268078297376633, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05021098628640175, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0340423658490181, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029440276324748993, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02811705321073532, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02781197801232338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01701042428612709, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014382489025592804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014255700632929802, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013142630457878113, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012944902293384075, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008838425390422344, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008649798110127449, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008402341976761818, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005489617120474577, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11659975349903107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11659975349903107, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.52.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2594822943210602, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24310415983200073, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23781640827655792, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.21596087515354156, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.12241027504205704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11633248627185822, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13704273104667664, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12624011933803558, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12423597276210785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.109832763671875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10437063127756119, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06973499804735184, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.060322731733322144, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05855508893728256, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05813474953174591, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03479953110218048, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.029772410169243813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.029641348868608475, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02714790776371956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02687986195087433, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017909707501530647, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01745784468948841, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01725511997938156, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010790027678012848, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.109832763671875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.109832763671875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.52.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.23104503750801086, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.19709444046020508, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.185089573264122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15453630685806274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10577040910720825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.09296070784330368, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12592461705207825, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1157696396112442, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11116135865449905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08503776043653488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07586728781461716, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.064760722219944, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.055712420493364334, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.051150448620319366, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.050030022859573364, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.032436009496450424, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02690499648451805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.026605866849422455, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.022840866819024086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.022087443619966507, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01746981404721737, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017596526071429253, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01601891592144966, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01233304850757122, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1157696396112442, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1157696396112442, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.52.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21319103240966797, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2000301629304886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.19591835141181946, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.17802363634109497, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10083000361919403, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.0960187092423439, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11229640990495682, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10370397567749023, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10230804979801178, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0906013622879982, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08606785535812378, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0573609359562397, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.049709588289260864, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.048391830176115036, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04808109626173973, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02868185006082058, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024962985888123512, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02487623691558838, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022885747253894806, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02268899790942669, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015115306712687016, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015179269015789032, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014682114124298096, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010178395546972752, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11229640990495682, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11229640990495682, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.52.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.25584059953689575, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.24006013572216034, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23504388332366943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.21357586979866028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.12082021683454514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11504083126783371, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13465414941310883, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1242360919713974, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1225857213139534, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10855895280838013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10318120568990707, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.0686991810798645, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.059456393122673035, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.057893577963113785, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05752120912075043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03434184938669205, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02962167002260685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02951505035161972, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02709135226905346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.026856228709220886, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01800190471112728, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017620712518692017, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.017490211874246597, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011325950734317303, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10855895280838013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10855895280838013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.52.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2622251510620117, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23534460365772247, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22406575083732605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19932661950588226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12134436517953873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11026357859373093, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14338284730911255, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13240379095077515, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12535366415977478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10492774844169617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09868551045656204, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07308920472860336, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06354203075170517, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05850142985582352, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.057274553924798965, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03666626662015915, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03055410459637642, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030220195651054382, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02729002758860588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026491565629839897, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019475853070616722, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01966298744082451, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01787153072655201, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.01341482438147068, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10492774844169617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10492774844169617, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.53.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1397346705198288, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.13072863221168518, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1272323578596115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11537408828735352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06578541547060013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.062197424471378326, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07458138465881348, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06888719648122787, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06681209802627563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.058974623680114746, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05601000040769577, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.037869494408369064, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.032931506633758545, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03152114897966385, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.031185301020741463, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.018930602818727493, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.016108112409710884, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01597718521952629, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.014705172739923, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014488178305327892, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009831023402512074, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009669119492173195, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00936161819845438, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006126756314188242, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11537408828735352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11537408828735352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.53.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12188594788312912, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11403302848339081, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11089295893907547, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10055817663669586, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.057372935116291046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05420193821191788, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06542383879423141, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06026065722107887, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0582532174885273, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.051451459527015686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.048980314284563065, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.033262304961681366, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028814321383833885, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02751438319683075, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027197647839784622, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016629043966531754, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014086656272411346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013957084156572819, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012866489589214325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012662931345403194, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008652936667203903, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00849362276494503, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008223054930567741, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005413061939179897, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11403302848339081, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11403302848339081, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.53.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.26076215505599976, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24406428635120392, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23871299624443054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.21659372746944427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.12290789186954498, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11680727452039719, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13767483830451965, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12680506706237793, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12478985637426376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11024608463048935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10474444925785065, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0700274333357811, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.06057709455490112, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05880969390273094, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.058374594897031784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.034933678805828094, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.029854269698262215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.029731592163443565, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.027217531576752663, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026942269876599312, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01794774830341339, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01745435781776905, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017283635213971138, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010699789971113205, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11024608463048935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11024608463048935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.53.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21001039445400238, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.17395880818367004, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1578987091779709, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.13469436764717102, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09710047394037247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.080281563103199, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12045373022556305, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11030498892068863, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10093419253826141, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07472947239875793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06937805563211441, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06194537132978439, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05306927114725113, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04699043184518814, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04546601325273514, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.030995234847068787, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.02475307695567608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.024134095758199692, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02040776051580906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01935402676463127, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016518065705895424, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01656940206885338, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014508227817714214, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.011458068154752254, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11030498892068863, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11030498892068863, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.53.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21389421820640564, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.20049677789211273, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.19635018706321716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.17822034657001495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.1010926216840744, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09622538089752197, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11276449263095856, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1040758267045021, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1026337519288063, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09078789502382278, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0862589180469513, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.057607561349868774, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04988643899559975, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04854664206504822, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.048222336918115616, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.028841182589530945, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.025072569027543068, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02498128078877926, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022981537505984306, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.022773319855332375, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015295157209038734, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015289868228137493, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01485058106482029, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01029954943805933, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11276449263095856, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11276449263095856, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.53.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.257414847612381, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.24128036201000214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23626302182674408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.21454672515392303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.12151647359132767, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11568327993154526, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13534529507160187, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1250053197145462, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.12334807217121124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.109135203063488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.1036488488316536, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06898250430822372, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05979140102863312, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05818884074687958, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.057810451835393906, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03442898020148277, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.029682502150535583, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.029569728299975395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02711162529885769, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02686464972794056, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01788805052638054, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017527155578136444, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.017357174307107925, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011061199940741062, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.109135203063488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.109135203063488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.53.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2697322964668274, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.24250027537345886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2313113957643509, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.20559778809547424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12491818517446518, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11378884315490723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14836248755455017, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1358095407485962, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.129008486866951, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10810799896717072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.1016988456249237, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07533404231071472, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06517553329467773, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.06021871417760849, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05900665372610092, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.037998639047145844, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03139840438961983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.031077846884727478, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.02803940139710903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.0272553451359272, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020525585860013962, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020100068300962448, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01892673783004284, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.01366782933473587, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10810799896717072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10810799896717072, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.54.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.14456401765346527, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.13510556519031525, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.13150180876255035, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11913833022117615, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06805197894573212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.06427940726280212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07722441107034683, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.07130636274814606, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06909330189228058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06090744212269783, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.057825006544589996, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03923017904162407, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03405889868736267, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03260023891925812, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.03224066272377968, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.019602995365858078, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.016665298491716385, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.016529498621821404, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.015200430527329445, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014973131008446217, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.010160326026380062, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.010010340251028538, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.009664220735430717, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006353636272251606, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07722441107034683, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07722441107034683, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.54.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12475873529911041, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1166403517127037, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11336714774370193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10268998891115189, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.058740828186273575, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.055432625114917755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0671592652797699, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06179763376712799, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05965853109955788, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05263874679803848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.050091806799173355, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03414544463157654, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029550958424806595, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.028168393298983574, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02783692628145218, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017068125307559967, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014419885352253914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014282980933785439, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013163082301616669, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012951324693858624, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008877934888005257, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008710253052413464, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008421096950769424, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005549090914428234, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1166403517127037, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1166403517127037, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.54.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2668291926383972, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24978229403495789, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2443002164363861, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.221503347158432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.12594644725322723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11962971836328506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.14105306565761566, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12994766235351562, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12786628305912018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.112862728536129, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10713370889425278, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.07181620597839355, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.06210291385650635, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.06025673449039459, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05982193723320961, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03583187982439995, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030624503269791603, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.030487900599837303, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.027908513322472572, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.027623482048511505, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01842505857348442, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017958199605345726, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017734693363308907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.011084389872848988, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.112862728536129, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.112862728536129, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.54.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.21717266738414764, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18052399158477783, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16613300144672394, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.13239948451519012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09791282564401627, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08337578177452087, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12102664262056351, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11096210032701492, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1044035404920578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07505418360233307, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06818872690200806, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06208242103457451, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05327339842915535, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04728413000702858, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.045790065079927444, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031090153381228447, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.024735018610954285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02429434284567833, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020246362313628197, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019175564870238304, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.016610780730843544, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01632596179842949, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014697417616844177, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.011003817431628704, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11096210032701492, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11096210032701492, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.54.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2123660296201706, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19910015165805817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.19487924873828888, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1769580841064453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10047459602355957, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09559743851423264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11202026903629303, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10345469415187836, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.1019773855805397, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09019818156957626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08569137006998062, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05725700408220291, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.049612268805503845, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.048266202211380005, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04794209077954292, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02864365465939045, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024948827922344208, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.024855254217982292, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022861870005726814, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.022657480090856552, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015154282562434673, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015243146568536758, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014713741838932037, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010303742252290249, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11202026903629303, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11202026903629303, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.54.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.25660908222198486, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.24052958190441132, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23550187051296234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.21389038860797882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.12125232815742493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11536018550395966, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13505876064300537, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12473127990961075, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.12303534895181656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10885822772979736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10332580655813217, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06886004656553268, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05969102680683136, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05809207260608673, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05771810933947563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03439122810959816, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.029679670929908752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.029567835852503777, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.027121108025312424, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.026874735951423645, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01788170635700226, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01760214753448963, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.017344240099191666, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01122956071048975, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10885822772979736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10885822772979736, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.54.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2667827904224396, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23928456008434296, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22791200876235962, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.20255568623542786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12336227297782898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11211441457271576, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14643096923828125, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13451506197452545, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12763839960098267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10667155683040619, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.10039862245321274, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07483337819576263, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06456117331981659, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.059415657073259354, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05816313624382019, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.037721168249845505, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.030901828780770302, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030583318322896957, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.027558868750929832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026748843491077423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.020397184416651726, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019719019532203674, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018740259110927582, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013247505761682987, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10667155683040619, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10667155683040619, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.55.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1416080743074417, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.13239476084709167, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1286870390176773, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11655725538730621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06669170409440994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0629318356513977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07591447979211807, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.07003528624773026, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06772564351558685, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.059708308428525925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.056743066757917404, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.038581520318984985, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03350425139069557, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03196348622441292, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.03159625455737114, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01929306983947754, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.016348185017704964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.016199536621570587, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.014910240657627583, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014676625840365887, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.010019748471677303, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009845473803579807, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.009514918550848961, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006255975924432278, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11655725538730621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11655725538730621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.55.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12349314987659454, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1154397502541542, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11215745657682419, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10160895437002182, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05815218761563301, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05484835058450699, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06635726988315582, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06114022806286812, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05904311314225197, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05208763852715492, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04952980950474739, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.033745426684617996, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02924516052007675, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02787788212299347, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02755507454276085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016864648088812828, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014270682819187641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014135051518678665, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.013022058643400669, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012813720852136612, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008767204359173775, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008623514324426651, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008320922031998634, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0054947673343122005, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1154397502541542, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1154397502541542, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.55.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.26224273443222046, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24558205902576447, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.2400890290737152, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.2176317274570465, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.12382738292217255, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11757321655750275, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.1387067437171936, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12782560288906097, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12568944692611694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.1109321117401123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10526321083307266, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.07050259411334991, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.061104293912649155, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05924985185265541, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05880391597747803, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03521482273936272, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030079813674092293, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02994624152779579, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.027395369485020638, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02710770070552826, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.018073279410600662, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017596594989299774, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.017389623448252678, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0107821524143219, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.1109321117401123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.1109321117401123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.55.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2185012400150299, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.18493610620498657, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.17308302223682404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.15083537995815277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.10102233290672302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08739671111106873, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.12045150250196457, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10911858826875687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.10456743836402893, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.08018594235181808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.07487226277589798, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06170591339468956, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05288809910416603, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04919581115245819, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04828803241252899, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.031252678483724594, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.026458067819476128, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02617204561829567, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.022494539618492126, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.021919865161180496, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01761297695338726, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017902890220284462, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016512831673026085, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0134509839117527, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10911858826875687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10911858826875687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.55.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21088184416294098, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1976059079170227, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.19341787695884705, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.17553271353244781, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0998111143708229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09489090740680695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11121635138988495, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10274320840835571, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10129835456609726, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08951395004987717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08493904769420624, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05683344602584839, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.0492841899394989, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04795026779174805, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04763646423816681, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.028434783220291138, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024817628785967827, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.024729391559958458, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02273019775748253, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02253037318587303, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015028941445052624, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015205005183815956, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014594280160963535, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010331777855753899, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11121635138988495, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11121635138988495, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.55.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.25556501746177673, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.2394169420003891, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.23437051475048065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.21281538903713226, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.12080856412649155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.1149405837059021, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13471980392932892, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12432616949081421, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.12263479083776474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10838373005390167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.10297814011573792, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06873208284378052, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05953498184680939, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05793461576104164, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05755609646439552, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03437696769833565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02969665266573429, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.029591966420412064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.027141695842146873, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.026895267888903618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01805008575320244, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.017763320356607437, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01752663590013981, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011543209664523602, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10838373005390167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10838373005390167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.55.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.26811346411705017, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.24020905792713165, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2282889187335968, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.20282989740371704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.124046690762043, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11237788945436478, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.1481146365404129, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1359826624393463, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1283564269542694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10716164857149124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.10079587996006012, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07552474737167358, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06526906788349152, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.059774793684482574, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05842147767543793, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.038154326379299164, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.031144946813583374, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030762141570448875, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.027775807306170464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02689414657652378, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.02071305550634861, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.020005974918603897, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018980899825692177, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013479433953762054, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10716164857149124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10716164857149124, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.56.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.14263984560966492, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.13322307169437408, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1294112503528595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11716938018798828, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06715040653944016, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.06333494186401367, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07646027207374573, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.07063452154397964, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06823957711458206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.06005805730819702, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05701320618391037, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03885350003838539, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03376868739724159, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03220377862453461, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.031826313585042953, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01941407471895218, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01648927666246891, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.016338035464286804, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01502799242734909, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014788627624511719, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.010099277831614017, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009966419078409672, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.009585939347743988, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006361985579133034, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07646027207374573, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07646027207374573, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.56.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1222173348069191, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11414922028779984, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11072517186403275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10022895038127899, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05752101168036461, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05414698272943497, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06593795865774155, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.060775578022003174, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05843132361769676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0514678992331028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04894522950053215, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.033534158021211624, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.029078884050250053, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.027600225061178207, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027237603440880775, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01677907630801201, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014154931530356407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013997705653309822, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012911886908113956, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01268179900944233, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00874967873096466, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.008610662072896957, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008266562595963478, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.0055237459018826485, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11414922028779984, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11414922028779984, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.56.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2656455636024475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24847926199436188, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.24288113415241241, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.22020426392555237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.1254357397556305, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.1190154030919075, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.14062590897083282, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.1295267790555954, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.1273781806230545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11222665011882782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10653842985630035, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.07154422998428345, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.061898596584796906, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.060002945363521576, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.059566207230091095, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03568534553050995, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030477195978164673, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.030334139242768288, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.027732720598578453, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.027440713718533516, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01833091862499714, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01785147935152054, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01762194186449051, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010965226218104362, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11222665011882782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11222665011882782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.56.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.197730153799057, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16461655497550964, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15091490745544434, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12490973621606827, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.08889532834291458, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07578853517770767, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11108533293008804, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10222029685974121, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0947457030415535, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0701228454709053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0648309588432312, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.056855879724025726, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04900147020816803, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04290906712412834, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04135989025235176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028437605127692223, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.022434396669268608, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.021922225132584572, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.018738361075520515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01766415871679783, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015048759058117867, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.014908959157764912, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.013057985343039036, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.009954242967069149, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11108533293008804, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11108533293008804, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.56.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21084459125995636, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19745241105556488, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1932603120803833, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1753941774368286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09973642230033875, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09483590722084045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11127683520317078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10275927931070328, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10127483308315277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08946558088064194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08496824651956558, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05694340541958809, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04930765926837921, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.0479426272213459, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04761790484189987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.028501922264695168, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024812687188386917, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.024723591282963753, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022730844095349312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.022526996210217476, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015136808156967163, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015213369391858578, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014693045988678932, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010341942310333252, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11127683520317078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11127683520317078, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.56.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24830827116966248, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.23267342150211334, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22772319614887238, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2066618800163269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11735688149929047, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.11158517003059387, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.13102923333644867, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.12081799656152725, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11913073807954788, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1052437350153923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0999269038438797, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06687051057815552, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05783906579017639, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05624808371067047, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05587330460548401, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03342454135417938, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.028778623789548874, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02867622673511505, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.026287607848644257, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02604428119957447, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.017521677538752556, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01713492162525654, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.0170031376183033, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010998894460499287, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1052437350153923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.1052437350153923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.56.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2664247453212738, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23810945451259613, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.22631734609603882, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.20090612769126892, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.12301322817802429, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11137537658214569, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14643682539463043, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.13483142852783203, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.1275128275156021, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.106119304895401, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09976531565189362, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07468327134847641, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06463676691055298, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05925387889146805, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.057923346757888794, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.037414729595184326, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.03081280179321766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030470428988337517, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.027436358854174614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.026573238894343376, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.0197952538728714, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.01971018686890602, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.018039144575595856, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013201351277530193, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.106119304895401, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.106119304895401, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.57.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1400589942932129, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.13081331551074982, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.12709808349609375, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11504978686571121, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06593286246061325, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.062148552387952805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07504314184188843, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06935485452413559, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06696753203868866, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05896700546145439, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0559234656393528, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0381242111325264, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.033159442245960236, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.031603358685970306, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.031235430389642715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.019061801955103874, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.016189541667699814, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01603863202035427, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.014762344770133495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01451939344406128, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00992305763065815, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009795297868549824, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.00941510684788227, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006259450223296881, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11504978686571121, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11504978686571121, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.57.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1190100684762001, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11116573214530945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10784610360860825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09766320139169693, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05601810663938522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.052740130573511124, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06414906680583954, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05915968120098114, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.056918006390333176, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0501275435090065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.047648411244153976, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03263210132718086, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.028284844011068344, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02686779573559761, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.026524782180786133, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016327183693647385, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013790289871394634, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.013647584244608879, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012581171467900276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.012364364229142666, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008527154102921486, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00839240849018097, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008067325688898563, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005400346592068672, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11116573214530945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11116573214530945, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.57.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.26486465334892273, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24770252406597137, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.24212855100631714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.21951128542423248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.1250527799129486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11869273334741592, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.14016762375831604, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12915144860744476, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12698549032211304, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11196931451559067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10620148479938507, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.07126445323228836, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.06173274293541908, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05983918160200119, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05938516557216644, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.0355791375041008, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030409542843699455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.03026459738612175, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02767244167625904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02738175541162491, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01827326789498329, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01780902035534382, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01758047565817833, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.01096752192825079, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11196931451559067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11196931451559067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.57.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.20691460371017456, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1752149611711502, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.16485446691513062, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1338736116886139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0957600399851799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.08318360149860382, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11130723357200623, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10226075351238251, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.09928762167692184, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.07296519726514816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06683403253555298, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.05708838626742363, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04913151636719704, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.046156130731105804, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.045436229556798935, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028553757816553116, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.0240632351487875, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.02386978082358837, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.019563870504498482, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019048983231186867, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015226058661937714, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015284454450011253, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014260189607739449, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010514703579246998, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11130723357200623, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11130723357200623, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.57.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.2094099372625351, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.19615031778812408, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1918734908103943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1741069257259369, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09908466041088104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09421102702617645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11045178771018982, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10205687582492828, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10059812664985657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08884451538324356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08419416844844818, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.056365858763456345, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04893631860613823, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04758686572313309, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04726425185799599, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02819731831550598, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024572286754846573, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.024477897211909294, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02248561382293701, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.022286968305706978, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.014842541888356209, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01497699599713087, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014401178807020187, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.01007930189371109, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11045178771018982, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11045178771018982, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.57.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.24235981702804565, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.22701145708560944, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.22220423817634583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.2016363888978958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.11454237997531891, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.10892704129219055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.12765060365200043, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.1178964301943779, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11626393347978592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.10268735885620117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0973869264125824, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06509055942296982, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.05641256645321846, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05488193780183792, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.05451595038175583, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.03249884396791458, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02802555076777935, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.027922337874770164, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.025573739781975746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.025339893996715546, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01688515394926071, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01660761795938015, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.016370559111237526, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010544068180024624, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11626393347978592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11626393347978592, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.57.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.2684134244918823, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.23870421946048737, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.2264866828918457, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.20102927088737488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.1235557347536087, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.11155558377504349, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14748725295066833, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.135713130235672, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12852267920970917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10632527619600296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09996151179075241, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0751475989818573, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.06500586867332458, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05947861447930336, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05810728669166565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.03763015568256378, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.030832603573799133, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.030508236959576607, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.027366481721401215, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.02648099511861801, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01986360363662243, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019623998552560806, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01806066744029522, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.012960974127054214, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10632527619600296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.10632527619600296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.58.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1356622874736786, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12651430070400238, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.12264570593833923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11088314652442932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.0637500211596489, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05997995287179947, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.0729595348238945, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0674443319439888, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06480222195386887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05694136768579483, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.05404810607433319, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.037081457674503326, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.032245997339487076, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.030580716207623482, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.030180267989635468, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01855081133544445, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01569201983511448, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.015524730086326599, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.014295628294348717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01403712946921587, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009678199887275696, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009559934027493, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.009147021919488907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00614024605602026, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11088314652442932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.11088314652442932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.58.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11272723972797394, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10519319027662277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10179565846920013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09199653565883636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05295959860086441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.049738865345716476, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06100168079137802, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05631951615214348, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05382828041911125, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04734322428703308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04494307190179825, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0309893861413002, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.026933355256915092, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.025411583483219147, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.025044241920113564, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01551002636551857, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01305125467479229, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012893189676105976, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011901273392140865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011667795479297638, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008109119720757008, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007997768931090832, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007626445963978767, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005162535235285759, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11272723972797394, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11272723972797394, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.58.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.26117703318595886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.24417005479335785, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23865161836147308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.21618062257766724, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.12328222393989563, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11695893853902817, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13844572007656097, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12743166089057922, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12522564828395844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11028261482715607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10462193191051483, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.07050581276416779, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.060907892882823944, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05902942642569542, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05857818201184273, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03518266975879669, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.030035272240638733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.029893307015299797, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02733866311609745, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.027046579867601395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01811596192419529, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017685968428850174, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01740195043385029, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010982356034219265, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11028261482715607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.11028261482715607, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.58.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2046307623386383, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.16904106736183167, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.15671111643314362, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.12950019538402557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.09284905344247818, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.07951164245605469, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11169036477804184, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.10233597457408905, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0985240563750267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.072522833943367, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.06482964754104614, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0574621856212616, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.04930480569601059, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.04501745104789734, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.04395366832613945, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.028888164088129997, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.023908797651529312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.023676032200455666, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.020010007545351982, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.019302546977996826, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.015694325789809227, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015989668667316437, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.014334375970065594, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.011524014174938202, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11169036477804184, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.11169036477804184, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.58.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.20101046562194824, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.18811438977718353, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1840287297964096, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1668870747089386, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.09511974453926086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09036542475223541, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1060885414481163, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09798870235681534, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09656784683465958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.08519915491342545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08074773848056793, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.054211489856243134, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.04701212793588638, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04568551480770111, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04537879675626755, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.027134202420711517, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02362544648349285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.023536380380392075, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.021617410704493523, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02141834981739521, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01432607602328062, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014451783150434494, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.013895331881940365, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.00977457594126463, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1060885414481163, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1060885414481163, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.58.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.22925697267055511, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.21460068225860596, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.210030198097229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.19044475257396698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10837076604366302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.102998748421669, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.1208941638469696, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11157210171222687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.11002672463655472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.09709769487380981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.09208092838525772, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.06167198717594147, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.053406812250614166, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.05193634331226349, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.051588274538517, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.030810566619038582, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.026561062783002853, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.026461917906999588, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.024234803393483162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.024009352549910545, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.016087500378489494, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01579985022544861, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01559958141297102, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.010125787928700447, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11157210171222687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.11157210171222687, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.58.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.25758326053619385, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.22850506007671356, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.21675212681293488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.19219891726970673, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11871565878391266, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10704164952039719, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.14223860204219818, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.1301635503768921, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.12346495687961578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.1018112450838089, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09572882950305939, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.07251215726137161, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.062484923750162125, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.05730422958731651, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.056029871106147766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.036410555243492126, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029989035800099373, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.029690617695450783, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026636837050318718, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025806989520788193, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.019540101289749146, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019430534914135933, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017837945371866226, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.01333352830260992, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.1018112450838089, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.1018112450838089, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.59.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12745346128940582, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11872397363185883, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11499328166246414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.10386808961629868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.059910163283348083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.056221794337034225, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06896370649337769, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06358666718006134, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06089162454009056, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.053438447415828705, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.050786182284355164, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.035071052610874176, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.030445735901594162, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.028771957382559776, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.028371864929795265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.017575113102793694, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.014848343096673489, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014677703380584717, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01353561319410801, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.013278612866997719, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009245841763913631, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.009185120463371277, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008712095208466053, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.006065635476261377, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11499328166246414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11499328166246414, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.59.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10884701460599899, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10144404321908951, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.0979650691151619, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0884963795542717, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.051144104450941086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.047863829880952835, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05932058021426201, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.05469491332769394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.051965270191431046, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04565258324146271, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04337484389543533, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03016672283411026, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02619718387722969, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.024560853838920593, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02416818216443062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.015109725296497345, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012663498520851135, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.012489786371588707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011548605747520924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.011300251819193363, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.007950165309011936, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007853507064282894, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007426355965435505, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005143222399055958, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10884701460599899, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10884701460599899, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.59.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.25002726912498474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.23346871137619019, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22790992259979248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.20606714487075806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11799436062574387, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11178512126207352, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13279543817043304, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12225295603275299, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11984607577323914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10540596395730972, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.0998755693435669, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06760039180517197, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.05843856930732727, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05649862810969353, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.05603587627410889, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03374888002872467, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028788521885871887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028638159856200218, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.026169825345277786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.025866711512207985, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.01740562729537487, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017017653211951256, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016699492931365967, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010616999119520187, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10540596395730972, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10540596395730972, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.59.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.13519665598869324, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11466445028781891, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10808970034122467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.0896863266825676, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.060668520629405975, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.05358459800481796, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.07277936488389969, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0663607120513916, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06450842320919037, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.048719003796577454, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04449315741658211, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.03732507675886154, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03267218917608261, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.030125949531793594, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.029507234692573547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01901094801723957, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.017135566100478172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.017039787024259567, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.015077194198966026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.014713311567902565, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.010942548513412476, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.012663408182561398, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.010195466689765453, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010439547710120678, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11466445028781891, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.11466445028781891, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.59.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.19103382527828217, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1787058264017105, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.17486868798732758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.15853053331375122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.0903763473033905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.08586004376411438, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10082805156707764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.09311002492904663, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.09175588190555573, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0809287428855896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.07675451785326004, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.05158361420035362, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.044727202504873276, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.04345385357737541, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04315170273184776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.025835903361439705, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.022562211379408836, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.022477298974990845, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.02066551148891449, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.02047446370124817, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.01373005285859108, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.013937299139797688, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.013311530463397503, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.0095905726775527, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10082805156707764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.10082805156707764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.59.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.21201901137828827, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1983875185251236, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1941315084695816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.17601363360881805, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.10020875185728073, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.09522311389446259, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11189267039299011, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.10323739796876907, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.10174014419317245, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.0897289365530014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.08516518771648407, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.057146601378917694, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.049448102712631226, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.048061247915029526, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.04773636534810066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02857796661555767, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.024666426703333855, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.024570807814598083, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.022523071616888046, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.022309662774205208, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.015039119869470596, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.01480733323842287, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.014581173658370972, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009662330150604248, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11189267039299011, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.11189267039299011, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.59.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.24555645883083344, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.21641048789024353, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.20487192273139954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.18152277171611786, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11307211220264435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.10135176032781601, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.13598868250846863, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.12410741299390793, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.11785879731178284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.09648364782333374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.09080249816179276, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.06959854066371918, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.059863027185201645, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.054852478206157684, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.05362209305167198, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.0351514108479023, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.029218051582574844, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.028969721868634224, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.026027832180261612, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.025246277451515198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.01934538409113884, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.019569937139749527, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.017771422863006592, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.014215856790542603, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11307211220264435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.11307211220264435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.60.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.12406093627214432, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1157752051949501, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.11209134757518768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1012532114982605, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05833248421549797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.054807018488645554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.067470483481884, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.0620446503162384, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.05929335579276085, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.05213487148284912, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.049653828144073486, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.0343274287879467, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02970888651907444, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.028037285432219505, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.027638878673315048, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.01722375676035881, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.01447652094066143, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.014298861846327782, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.01321769505739212, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01295995619148016, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.009099706076085567, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00896617490798235, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008579540066421032, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.005914626177400351, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1157752051949501, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.1157752051949501, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.60.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10394363105297089, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.09703008085489273, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09362185746431351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08455930650234222, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04883235692977905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04571656510233879, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05679389089345932, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.052398599684238434, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04963136464357376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.043653372675180435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04147733375430107, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.028873497620224953, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02508373185992241, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.02346109040081501, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.023069947957992554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.014473899267613888, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.012115711346268654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.011938023380935192, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.011064399965107441, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.010815087705850601, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.00764465844258666, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.007545821834355593, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.007135680876672268, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004955119453370571, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10394363105297089, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.10394363105297089, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.60.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.2543550133705139, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.237669438123703, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.23221328854560852, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.21008281409740448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.12013223022222519, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.11389482766389847, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13497380912303925, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.12416589260101318, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.12200877815485, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10728312283754349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.10171721875667572, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06870328634977341, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.059380825608968735, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05752149224281311, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.057075195014476776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.034294385462999344, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.029295263811945915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.029154708608984947, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.02663344144821167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.026348479092121124, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017672959715127945, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.01730029471218586, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.016971150413155556, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010809680446982384, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10728312283754349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10728312283754349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.60.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.1543765366077423, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.12556329369544983, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.1169433742761612, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09464513510465622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.07070982456207275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.06019206717610359, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.08254807442426682, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.07513844966888428, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.07404763996601105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.051416702568531036, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04743669927120209, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.04261477291584015, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.036709900945425034, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03473076596856117, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.034262511879205704, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.02175251580774784, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.019143594428896904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.019075781106948853, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.015681035816669464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.015362889505922794, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.012555730529129505, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.013395422138273716, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.011974206194281578, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.010665902867913246, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09464513510465622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09464513510465622, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.60.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.1577051430940628, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1474185734987259, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.14425083994865417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.130820631980896, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.07496801763772964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07121637463569641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08369054645299911, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07725010067224503, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.07608716934919357, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06720852106809616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.06379739940166473, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04324156045913696, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03780399635434151, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03677436709403992, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03653218597173691, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.021833257749676704, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.02032492123544216, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02025669626891613, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.018902169540524483, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.018767863512039185, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.012383808381855488, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.014148145914077759, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.012064984999597073, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.011448153294622898, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08369054645299911, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.08369054645299911, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.60.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.16893823444843292, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.1580926924943924, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.15464869141578674, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.1402987688779831, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.08027685433626175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.07631783932447433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0898388922214508, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.08271826803684235, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.08149644732475281, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.07200516760349274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.0685269683599472, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.04641467332839966, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.040493883192539215, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03939694166183472, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.039141204208135605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.02349652722477913, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.021756265312433243, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.02168140932917595, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.020234474912285805, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.020083405077457428, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.013432804495096207, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.015118211507797241, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.01309478934854269, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.012212688103318214, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0898388922214508, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.0898388922214508, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.60.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.19449463486671448, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.16948747634887695, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.1597457081079483, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.14136777818202972, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.08913520723581314, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.07926701009273529, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.10952115058898926, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.09847398847341537, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.09323197603225708, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.07580691576004028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.07172702997922897, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.0562499463558197, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.04815276339650154, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.04383353516459465, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.04278818145394325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.028855979442596436, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.024327922612428665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.024104634299874306, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.021895090118050575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.021251939237117767, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.016841476783156395, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.017421230673789978, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.01549201924353838, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.013761810958385468, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.10952115058898926, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.10952115058898926, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.61.self_attn.q_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.11716917902231216, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10946128517389297, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.10573579370975494, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.09558723866939545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.05503920093178749, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.051600974053144455, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.06424327939748764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.058853112161159515, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.0559115894138813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.04924304783344269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.046867698431015015, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.032718464732170105, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.02817756123840809, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.026442579925060272, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02602660283446312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.016401449218392372, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.013650931417942047, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01346143800765276, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.012475615367293358, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01220450084656477, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.008650395087897778, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.00847918726503849, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.008107423782348633, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.00557346036657691, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10946128517389297, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10946128517389297, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.61.self_attn.k_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09222707897424698, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.08610650897026062, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.08292984962463379, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.07493048161268234, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.04326966032385826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.04041029512882233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.05079801008105278, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.04666709154844284, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.04395810887217522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.038688503205776215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.036814648658037186, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.025857623666524887, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.022359037771821022, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.020814131945371628, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.02043212205171585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.012978475540876389, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.010788428597152233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.010614716447889805, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.009865782223641872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.009629474952816963, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.006894455756992102, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.0068051815032958984, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.006403924897313118, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.004549156408756971, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09222707897424698, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.09222707897424698, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.61.self_attn.v_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.24114950001239777, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.22533723711967468, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.22002354264259338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.1991535723209381, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.11391478776931763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.10785628855228424, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.13018809258937836, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.11801435798406601, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11567354202270508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.10185044258832932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.09695649147033691, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.06650546193122864, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.056768253445625305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.05479668080806732, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.054298702627420425, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.03363877534866333, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.028251660987734795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.028073295950889587, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.025799095630645752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.02547399513423443, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.017657862976193428, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.017107397317886353, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01690993644297123, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.011180724017322063, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11567354202270508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.11567354202270508, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.61.self_attn.o_proj", - "numel": 26214400, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.17529296875, - "total_bits": 57024000.0, - "err": 0.13878662884235382, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10565361380577087, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 68820480.0, - "err": 0.09329184889793396, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.72529296875, - "total_bits": 71441920.0, - "err": 0.08148816227912903, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.22529296875, - "total_bits": 84549120.0, - "err": 0.06147114187479019, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.72529296875, - "total_bits": 97656320.0, - "err": 0.0455855093896389, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 79464320.0, - "err": 0.08150555193424225, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 81927680.0, - "err": 0.06996043026447296, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.17529296875, - "total_bits": 83238400.0, - "err": 0.06603904813528061, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.52529296875, - "total_bits": 92413440.0, - "err": 0.0456317663192749, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.662646484375, - "total_bits": 96014080.0, - "err": 0.04416261985898018, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 105678720.0, - "err": 0.04173785448074341, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 108142080.0, - "err": 0.03543267771601677, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.22529296875, - "total_bits": 110763520.0, - "err": 0.03171462193131447, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.32529296875, - "total_bits": 113384960.00000001, - "err": 0.030791109427809715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 131893120.0, - "err": 0.022377699613571167, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.22529296875, - "total_bits": 136977920.0, - "err": 0.019541088491678238, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.32529296875, - "total_bits": 139599360.0, - "err": 0.01933440752327442, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.52529296875, - "total_bits": 144842240.0, - "err": 0.017109161242842674, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.72529296875, - "total_bits": 150085120.0, - "err": 0.01659405790269375, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 158107520.0, - "err": 0.014331627637147903, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 160570880.0, - "err": 0.015787625685334206, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.2313232421875, - "total_bits": 163350400.0, - "err": 0.01337422989308834, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 212999679.99999997, - "err": 0.013853316195309162, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10565361380577087, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 62266880.00000001, - "err": 0.10565361380577087, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.61.mlp.gate_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.14564822614192963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.13615527749061584, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.1331133395433426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.12051626294851303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.06871099770069122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.06521597504615784, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.07694800943136215, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.07088286429643631, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.06976636499166489, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.06143851578235626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.05834649130702019, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.039275236427783966, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03398699685931206, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.033000148832798004, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.03276707977056503, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.01964317075908184, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.017087921500205994, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.01701914332807064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.015621613711118698, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.01547289453446865, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.010367803275585175, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.010492920875549316, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.010038715787231922, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.007139108143746853, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.07694800943136215, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.07694800943136215, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.61.mlp.up_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1751085069444445, - "total_bits": 153951744.0, - "err": 0.13939067721366882, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.3751085069444446, - "total_bits": 168107520.0, - "err": 0.13030026853084564, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.6251085069444446, - "total_bits": 185802240.0, - "err": 0.12735530734062195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.11534757167100906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.2251085069444443, - "total_bits": 228269568.0, - "err": 0.06619574874639511, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7251085069444443, - "total_bits": 263659008.0, - "err": 0.06282953172922134, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.031277126736111, - "total_bits": 214550400.0, - "err": 0.07423404604196548, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.1251085069444446, - "total_bits": 221191680.0, - "err": 0.0682879090309143, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1751085069444445, - "total_bits": 224730624.0, - "err": 0.0672004446387291, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.5251085069444446, - "total_bits": 249503232.0, - "err": 0.05922003462910652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6625542534722224, - "total_bits": 259231488.0, - "err": 0.05633363872766495, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031277126736111, - "total_bits": 285329280.0, - "err": 0.03833315148949623, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125108506944445, - "total_bits": 291970560.0, - "err": 0.03321199119091034, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.225108506944444, - "total_bits": 299048448.0, - "err": 0.03226221352815628, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.325108506944445, - "total_bits": 306126336.0, - "err": 0.0320429801940918, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031277126736111, - "total_bits": 356108160.0, - "err": 0.019336992874741554, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.225108506944444, - "total_bits": 369827328.0, - "err": 0.01748310774564743, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.325108506944445, - "total_bits": 376905216.0, - "err": 0.01741674728691578, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.525108506944444, - "total_bits": 391060992.0, - "err": 0.01616964302957058, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.725108506944444, - "total_bits": 405216768.0, - "err": 0.016032127663493156, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031277126736111, - "total_bits": 426887040.0, - "err": 0.010867214761674404, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125108506944445, - "total_bits": 433528320.0, - "err": 0.011782326735556126, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.231277126736111, - "total_bits": 441042816.0, - "err": 0.010570920072495937, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125108506944445, - "total_bits": 575086080.0, - "err": 0.009180519729852676, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.11534757167100906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.7251085069444443, - "total_bits": 192880128.0, - "err": 0.11534757167100906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - } - }, - { - "key": "model.layers.61.mlp.down_proj", - "numel": 70778880, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.08938180655241013, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.37529296875, - "total_bits": 168120576.0, - "err": 0.07716738432645798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.62529296875, - "total_bits": 185815296.0, - "err": 0.07152314484119415, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.727144820601852, - "total_bits": 193024256.0, - "err": 0.06335359811782837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.227144820601852, - "total_bits": 228413696.0, - "err": 0.04080098494887352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.736404079861111, - "total_bits": 264458495.99999997, - "err": 0.035618122667074203, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313232421875, - "total_bits": 214553664.0, - "err": 0.05311054736375809, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.12529296875, - "total_bits": 221204736.0, - "err": 0.04679091274738312, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1715892650462965, - "total_bits": 224481536.0, - "err": 0.042937908321619034, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.523441116898148, - "total_bits": 249385216.0, - "err": 0.03476553037762642, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6644983362268517, - "total_bits": 259369088.0, - "err": 0.03323355317115784, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.0313232421875, - "total_bits": 285332544.0, - "err": 0.02737562544643879, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.12529296875, - "total_bits": 291983616.0, - "err": 0.023242603987455368, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.227144820601852, - "total_bits": 299192576.0, - "err": 0.02059062384068966, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3289966724537035, - "total_bits": 306401536.0, - "err": 0.019920213147997856, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.0313232421875, - "total_bits": 356111424.0, - "err": 0.014245348051190376, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.227144820601852, - "total_bits": 369971456.0, - "err": 0.012186803855001926, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.310478153935185, - "total_bits": 375869696.0, - "err": 0.012026181444525719, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.523441116898148, - "total_bits": 390942976.0, - "err": 0.011187940835952759, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.727144820601852, - "total_bits": 405360896.0, - "err": 0.010825080797076225, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.0313232421875, - "total_bits": 426890304.0, - "err": 0.008557661436498165, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.12529296875, - "total_bits": 433541376.0, - "err": 0.009537642821669579, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.235026945891204, - "total_bits": 441308224.0, - "err": 0.00771605409681797, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.12529296875, - "total_bits": 575099136.0, - "err": 0.008101975545287132, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ], - "best_option_max": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.08938180655241013, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - "best_option": { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1715892650462965, - "total_bits": 153702656.0, - "err": 0.08938180655241013, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - } - } - ], - "last_module_idx": 126, - "base_perplexity": 6.733563458685312, - "reuse_measurement": "./quantsIambeRP/measurement.json", - "cal_filename": "/home/raven/exllamav2/cdpoquants/cal_data.safetensors", - "q_last_module_idx": 126, - "cal_perplexity": 6.980928606533813 -} \ No newline at end of file