diff --git "a/measurement-Toppy-M-7B.json" "b/measurement-Toppy-M-7B.json" new file mode 100644--- /dev/null +++ "b/measurement-Toppy-M-7B.json" @@ -0,0 +1,103530 @@ +{ + "measurement": [ + { + "key": "model.layers.0.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.009975614957511425, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.008488782681524754, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.004255140200257301, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.00449338648468256, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.004493127577006817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.001870558364316821, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.009348634630441666, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.008394616656005383, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.00466539291664958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.004087334033101797, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.004252967890352011, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.00442176079377532, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.004085726570338011, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0024612066335976124, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0019019781611859798, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0023637523408979177, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.0016977305058389902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.0014587141340598464, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0016471518902108073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0014116233214735985, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0015682769007980824, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.001646726275794208, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.001268200110644102, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0013923755614086986, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.009975614957511425, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.009975614957511425, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.010014334693551064, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.008531960658729076, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.004226046614348888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.004454968497157097, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.004454681649804115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0017493332270532846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.00956466794013977, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.008419059216976166, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.004611292388290167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.00400036945939064, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.004181440453976393, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.004369073081761599, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.003998170606791973, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0023615702521055937, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.0017480256501585245, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0022941753268241882, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0014962386339902878, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0012122653424739838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0014350070850923657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0011502004927024245, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0014297766610980034, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0014346875250339508, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.001085722353309393, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0011239019222557545, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.010014334693551064, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.010014334693551064, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10337915271520615, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06304154545068741, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.03981944918632507, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.044737886637449265, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04468102008104324, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.022614995017647743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.06884756684303284, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.056997645646333694, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04866616055369377, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02768649533390999, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03284129872918129, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.03730938211083412, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02730310894548893, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.021695690229535103, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.020150600001215935, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.019102269783616066, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.011463089846074581, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009964178316295147, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008276176638901234, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0070175910368561745, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.009775478392839432, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.008210395462810993, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0064743272960186005, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.005399079993367195, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10337915271520615, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.10337915271520615, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1040986180305481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06835802644491196, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.047903113067150116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.04657775163650513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04466158151626587, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.025249851867556572, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06558244675397873, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0583258718252182, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04925146698951721, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02919142134487629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.031339969485998154, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.033544644713401794, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028188010677695274, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.022196616977453232, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02053023688495159, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016973672434687614, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012643820606172085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011537737213075161, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010091042146086693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.009002720937132835, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009432457387447357, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009951298125088215, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007572196889668703, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.007745177019387484, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1040986180305481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1040986180305481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.09539234638214111, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.08966430276632309, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.08788596093654633, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.08129473030567169, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.04264277592301369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.04111403226852417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.04730578884482384, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.04364188015460968, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.042986296117305756, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.039277058094739914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0382271483540535, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.02391854301095009, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.02085079252719879, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.020438790321350098, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.02033623307943344, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.012029459699988365, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.010962780565023422, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.010875627398490906, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.010367894545197487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.010309835895895958, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.006696693599224091, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.0072415354661643505, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.006567724980413914, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00556176295503974, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.09539234638214111, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.09539234638214111, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.11656881123781204, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1109113022685051, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.10920602083206177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1010214239358902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.0524706169962883, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.05096636712551117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.057773247361183167, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.05340830236673355, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.05281117558479309, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.048705413937568665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0473613366484642, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.029073474928736687, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.025247754529118538, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.024868350476026535, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.024775201454758644, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.014490220695734024, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.012731313705444336, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.012636883184313774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.012002358213067055, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.011948373168706894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.007592977024614811, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.007578418590128422, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.007467755116522312, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.004982172977179289, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1010214239358902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1010214239358902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.0.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.08065692335367203, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.06929779052734375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.0624423511326313, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.0557401180267334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.0360322967171669, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.03079933673143387, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.04988919198513031, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.04321947693824768, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.03749258816242218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.030316857621073723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.02935834415256977, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.024326445534825325, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.02091114968061447, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.018101021647453308, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.017392052337527275, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.012797816656529903, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.010789284482598305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0104824872687459, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.009870224632322788, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.009502410888671875, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.008062930777668953, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.008547881618142128, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.007359318435192108, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.007269620429724455, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.08065692335367203, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.08065692335367203, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.020120782777667046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.013727654702961445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.007899479940533638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.008554661646485329, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.008272488601505756, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0037762834690511227, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.014223960228264332, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.012873813509941101, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.009368854574859142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.006287196651101112, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.006738950498402119, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.007190204691141844, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.006139583885669708, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.004132270347326994, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0035101978573948145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0036239312030375004, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.002382460283115506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.001989922719076276, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0020542507991194725, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0016737673431634903, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0019784383475780487, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0020319719333201647, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0013445394579321146, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.001498152269050479, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.020120782777667046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.020120782777667046, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.01752626523375511, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.012258165515959263, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.006757559720426798, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.007328596897423267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.007121178787201643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.003031461965292692, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.013048873282968998, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.011664495803415775, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.00810120441019535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.005645262077450752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.006068518850952387, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.006532107945531607, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.005543030798435211, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0035411808639764786, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.0028936800081282854, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.003293932881206274, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.002016696846112609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0016293812077492476, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0017684509512037039, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.001372164231725037, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0017520864494144917, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0017519836546853185, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0010836714645847678, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0012344353599473834, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.01752626523375511, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.01752626523375511, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1289050430059433, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08658798784017563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06330502033233643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06105963513255119, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.05545496940612793, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03357243165373802, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.07885343581438065, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0714002177119255, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.06102466210722923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03758958727121353, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03852061182260513, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.040196508169174194, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.034086160361766815, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02688555419445038, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.024888036772608757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.020091740414500237, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.014133032411336899, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.012393409386277199, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010720429010689259, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009182706475257874, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.010425008833408356, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.010106212459504604, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.007829021662473679, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.006505022756755352, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08658798784017563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08658798784017563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.15737372636795044, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1266971081495285, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.11393160372972488, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09447675198316574, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.07227501273155212, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.059601761400699615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09112956374883652, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08082984387874603, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.07486294955015182, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05293891206383705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04944320023059845, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04695533588528633, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.039388258010149, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.03560546413064003, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.03468620404601097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.023753192275762558, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.019784551113843918, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.018912820145487785, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.016371114179491997, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.0157622117549181, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01329388190060854, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014279390685260296, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0120733343064785, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011332832276821136, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09447675198316574, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09447675198316574, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.14755277335643768, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14018087089061737, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13784874975681305, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12639650702476501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06818240135908127, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0659828931093216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0753614604473114, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06941039115190506, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06859716027975082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06249415501952171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06046677380800247, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03859868273139, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03361477702856064, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.033100686967372894, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03298146650195122, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019493451341986656, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018082736060023308, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01796696148812771, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017046097666025162, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01699230819940567, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011055590584874153, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012312906794250011, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010924716480076313, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00984986498951912, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0753614604473114, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0753614604473114, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.18414166569709778, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1754598766565323, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17281630635261536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.15879228711128235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08543118089437485, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0828864648938179, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09413658827543259, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08674109727144241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08590972423553467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07844393700361252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07589642703533173, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04800347238779068, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04170664772391319, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.041175421327352524, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04103710874915123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024060066789388657, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.021972883492708206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02182134985923767, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.020663440227508545, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.020597266033291817, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013212785124778748, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014337838627398014, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.013041284866631031, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010924460366368294, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09413658827543259, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09413658827543259, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.1.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.016235945746302605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.015640774741768837, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.005952054169028997, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.0055327326990664005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.004776772111654282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0029296891298145056, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.01865343004465103, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.015086260624229908, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.004893035627901554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.004425722174346447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.004446011036634445, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.004348946735262871, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.004141027573496103, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.0029543275013566017, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.0022175218909978867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0026048244908452034, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.0019151190062984824, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0010313817765563726, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.0018718185601755977, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.0009452950325794518, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0018507397035136819, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.0018369409954175353, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.0007251020288094878, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.0008258139132522047, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.016235945746302605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.016235945746302605, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.051718153059482574, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0409250482916832, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.034781236201524734, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.031382955610752106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.022835157811641693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.017462387681007385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.031302887946367264, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.02845756895840168, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.02437676303088665, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.01778547465801239, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.017547301948070526, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.01585587114095688, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.013583417050540447, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.011058686301112175, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.010389599949121475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.007932848297059536, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.005864677019417286, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.005369941703975201, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0048939986154437065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.004435967653989792, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.004138810094445944, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.004151965957134962, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.003257984761148691, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.00280040898360312, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.051718153059482574, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.051718153059482574, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.052827488631010056, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.04018547385931015, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.032212015241384506, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.029644159600138664, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.022865550592541695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.016130072996020317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03299324959516525, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.029960962012410164, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.024731317535042763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.01745537854731083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.017494743689894676, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.016653597354888916, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.014282040297985077, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.011107730679214, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.010241350159049034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.00832257978618145, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0059300633147358894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.005285206250846386, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.004903706721961498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.00428596418350935, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004343597684055567, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004353415220975876, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0032540392130613327, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0029080777894705534, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.052827488631010056, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.052827488631010056, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.15699945390224457, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1315046101808548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1210164725780487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.10734879225492477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.07172398269176483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.060887016355991364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.08826237916946411, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.07981418818235397, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.07446955144405365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.05702754110097885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.05460914224386215, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04482468590140343, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.03820148855447769, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.03444910794496536, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.03352254629135132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.02241000533103943, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.017732124775648117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.016863809898495674, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.014750064350664616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.014103386551141739, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.011686692014336586, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.011220112442970276, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.010315909050405025, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0071045211516320705, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.10734879225492477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.10734879225492477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.17834922671318054, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.15671351552009583, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14808504283428192, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.12187489122152328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.08325686305761337, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.07489894330501556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09968981146812439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08981907367706299, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.08503549546003342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06616200506687164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.060840800404548645, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05157294124364853, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04396386072039604, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04100678861141205, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.040272586047649384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.026388950645923615, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.022839361801743507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02225232869386673, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01979055628180504, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.019346999004483223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01529568713158369, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016332317143678665, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014413350261747837, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013138427399098873, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09968981146812439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09968981146812439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1937594711780548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.18320132791996002, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17991909384727478, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.16391168534755707, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.09072764217853546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0873531699180603, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10040581971406937, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09246152639389038, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.09141788631677628, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08202394843101501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07863375544548035, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0512576699256897, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04436664655804634, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04358825460076332, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04341015964746475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.025650838389992714, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02283993549644947, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.0226303581148386, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021133527159690857, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.021023239940404892, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013820537365972996, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014338858425617218, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.013568245805799961, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010303975082933903, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10040581971406937, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10040581971406937, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22437214851379395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21245303750038147, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20874232053756714, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19028419256210327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10503249615430832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10121429711580276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11586420983076096, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10696004331111908, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10579343885183334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09499920159578323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09094953536987305, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05892137065529823, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05105090141296387, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.050211913883686066, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.050017569214105606, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029419496655464172, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.025803426280617714, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.025564786046743393, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02378004975616932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02365603856742382, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01550007052719593, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01546778529882431, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015223451890051365, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01029705349355936, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10696004331111908, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10696004331111908, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.2.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.17081937193870544, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.14749746024608612, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1379811018705368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1226530447602272, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.07684372365474701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.06784476339817047, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09323187917470932, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.08484826236963272, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.07975950837135315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0639876127243042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.06131540238857269, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.047438476234674454, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04070360213518143, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.03712056577205658, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.036246348172426224, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.023884059861302376, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.019829969853162766, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.019405318424105644, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.017409667372703552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.016842106357216835, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.013140448369085789, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.013403676450252533, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.012006598524749279, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.009813441894948483, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09323187917470932, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09323187917470932, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.042294941842556, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.03562586009502411, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.031779855489730835, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.02826443873345852, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.019190866500139236, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.015913832932710648, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.02525634691119194, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.022972941398620605, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.0199971292167902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.015530762262642384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.015067890286445618, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.012838976457715034, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.01097223348915577, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.009294355288147926, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.008854367770254612, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.006434502080082893, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.004924617242068052, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.004602309316396713, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.00421915901824832, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.003930081147700548, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0034059917088598013, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0034109526313841343, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0028211537282913923, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0023433989845216274, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.042294941842556, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.042294941842556, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.041703034192323685, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.03412956744432449, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.02928028628230095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.026119058951735497, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.01853426732122898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.014599998481571674, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.025679677724838257, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.023314977064728737, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.01961858943104744, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.014806545339524746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.014575829729437828, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.013015391305088997, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.011124670505523682, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.00896154809743166, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.008379545994102955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.006506350822746754, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.004703724291175604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.004293422680348158, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.003973417915403843, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0035688236821442842, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.003381773130968213, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.003304988844320178, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.002621112857013941, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0021298278588801622, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.041703034192323685, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.041703034192323685, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1677691489458084, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.14337500929832458, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.13357168436050415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.11794490367174149, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0769491121172905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.06701435148715973, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0924617275595665, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08485705405473709, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.07952982187271118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06217118725180626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.059256646782159805, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.04695715382695198, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.040466196835041046, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.036919210106134415, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.03602612018585205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.023465493693947792, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.018886718899011612, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.018066417425870895, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.015864629298448563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.015241431072354317, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012059420347213745, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01171113271266222, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.010768743231892586, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007223820313811302, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0924617275595665, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0924617275595665, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.17262853682041168, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.15153519809246063, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14382557570934296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.11840501427650452, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.08010023832321167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.07220814377069473, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09412501007318497, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08561352640390396, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.08211029320955276, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06376594305038452, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.05783259868621826, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.048492349684238434, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0415244922041893, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.039095643907785416, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.038498327136039734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.024466367438435555, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.021252058446407318, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.020718155428767204, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01824287138879299, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01786448247730732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01361089013516903, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014601271599531174, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.012849939987063408, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011318158358335495, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09412501007318497, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09412501007318497, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2011285424232483, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1901583969593048, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1867762953042984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.17024201154708862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.09460818022489548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09102725982666016, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10464954376220703, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.09648779034614563, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.09535173326730728, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0854184478521347, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08190169930458069, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.053414538502693176, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04622931033372879, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.045403193682432175, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.045207228511571884, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.026717016473412514, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02362404204905033, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.023405462503433228, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.021808339282870293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02168286219239235, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.014289123937487602, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014608166180551052, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01402254868298769, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010235561989247799, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10464954376220703, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10464954376220703, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23458731174468994, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22198590636253357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21809077262878418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19883309304714203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11038140952587128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10625239461660385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12205179035663605, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11243285983800888, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11120155453681946, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09972204267978668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09562975913286209, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06223702430725098, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0537760891020298, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0528535433113575, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05263882130384445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03111019916832447, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.027211973443627357, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026960698887705803, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025065599009394646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02493007853627205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016511021181941032, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016384750604629517, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016215458512306213, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01097786333411932, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09972204267978668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09972204267978668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.3.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.19453702867031097, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16987751424312592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.15987223386764526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.14207282662391663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08817525953054428, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07860172539949417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10605832189321518, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09648345410823822, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09118377417325974, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07396799325942993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07054862380027771, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.053827736526727676, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.046232156455516815, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04249752685427666, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04159320890903473, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.027114950120449066, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.022497372701764107, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02205835096538067, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.019783105701208115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.019197281450033188, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.014808494597673416, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.014901298098266125, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.013603849336504936, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.010687515139579773, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10605832189321518, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10605832189321518, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.05743385851383209, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.048960622400045395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.04392483830451965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.03924497216939926, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.026150286197662354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.02191748656332493, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.034582898020744324, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03124912828207016, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.02722134254872799, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.021491676568984985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.020900515839457512, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.017635663971304893, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.014984658919274807, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.012697979807853699, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.01210580300539732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.00884998869150877, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.006780932657420635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.006360716186463833, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.005894201807677746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.00551276421174407, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.004717457573860884, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.004755120258778334, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.00391767593100667, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.003353662556037307, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.05743385851383209, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.05743385851383209, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.05425762012600899, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0449802502989769, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.03936252370476723, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.03525886684656143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.024208582937717438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.01958029344677925, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0330955870449543, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.029854362830519676, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.02562055177986622, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.019655661657452583, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.019288355484604836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01684214547276497, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.014273314736783504, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.011753175407648087, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.011083864606916904, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.008449837565422058, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.006243593990802765, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.005786938592791557, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.005371856968849897, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0049231513403356075, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004453014582395554, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0044190045446157455, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.00355046265758574, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003015480237081647, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.05425762012600899, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.05425762012600899, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.18307174742221832, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1587740033864975, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1493314802646637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.13257893919944763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08435791730880737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.07471268624067307, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10001546144485474, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09178847819566727, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.08691093325614929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.06931160390377045, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.06585875153541565, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05083505064249039, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04387981817126274, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.040446821600198746, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.0396023765206337, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025395208969712257, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.020750224590301514, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.019968383014202118, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.017694802954792976, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.017115198075771332, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01316023524850607, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012863018549978733, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.011927603743970394, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008133644238114357, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10001546144485474, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10001546144485474, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.17427122592926025, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1511039435863495, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.14234250783920288, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.11844752728939056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.08129159361124039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.07252970337867737, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09687907993793488, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.08692467957735062, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.08298248797655106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06397024542093277, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.05826693773269653, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04997267946600914, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04245300963521004, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.03989905118942261, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.03928166627883911, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.025337014347314835, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02192998118698597, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02135726995766163, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.018782667815685272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01838533580303192, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014239390380680561, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015355139970779419, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.013432981446385384, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012129412963986397, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09687907993793488, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.09687907993793488, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.18463380634784698, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17382854223251343, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.17017312347888947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.15477241575717926, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08697831630706787, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08324351161718369, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09702508896589279, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08927309513092041, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08778613805770874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07804857194423676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.074758380651474, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04950307682156563, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04278641939163208, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04176831617951393, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.041517358273267746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02480500377714634, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.021688397973775864, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02143215946853161, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01990080252289772, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01975131221115589, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013280585408210754, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013366883620619774, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012940040789544582, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009247073903679848, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09702508896589279, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09702508896589279, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23616330325603485, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22276726365089417, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21849097311496735, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19894841313362122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11124847829341888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10674288868904114, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12353036552667618, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11378879845142365, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11221189796924591, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10007819533348083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0958113819360733, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06296104192733765, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05440172553062439, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.053280044347047806, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.053018949925899506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03149491176009178, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02733246237039566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02703462354838848, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025050822645425797, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024884123355150223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016617469489574432, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01632639393210411, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016246145591139793, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010694599710404873, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10007819533348083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10007819533348083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.4.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.20099958777427673, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.17688941955566406, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16724321246147156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.14871613681316376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.0915033370256424, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08210384100675583, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10991910099983215, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09958397597074509, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09437920153141022, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07733453065156937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07373321056365967, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05598289147019386, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04785412549972534, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04422769322991371, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04332263022661209, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02823869325220585, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02357562631368637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02314242348074913, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020882174372673035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.02031654864549637, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015493057668209076, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01577710546553135, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014308291487395763, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011564629152417183, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09958397597074509, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09958397597074509, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06984193623065948, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06128278002142906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05652153864502907, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.050507787615060806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.0320378802716732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.028006715700030327, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.040665242820978165, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03694961965084076, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.03301488980650902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02696526236832142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.025982346385717392, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.020630542188882828, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0176674984395504, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.015484539791941643, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.014928890392184258, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010336522944271564, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.008173620328307152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.007776404730975628, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007188740652054548, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.006833717692643404, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005485439673066139, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005521021783351898, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0047173164784908295, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0038254123646765947, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06984193623065948, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06984193623065948, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.062182940542697906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.053613096475601196, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04844026267528534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.04318249970674515, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.028163336217403412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.023928215727210045, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03675394132733345, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.033544525504112244, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.029216080904006958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.023386405780911446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.022660497575998306, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01863229088485241, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.015963692218065262, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.013559293001890182, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.012934897094964981, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009313655085861683, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.007037935312837362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.006585711147636175, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006088148802518845, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.00566308805719018, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004821655806154013, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004701665602624416, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.003973550163209438, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0029914260376244783, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.062182940542697906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.062182940542697906, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.19783452153205872, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17708274722099304, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16954000294208527, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15104222297668457, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09187225252389908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08397407084703445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10599040985107422, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09753518551588058, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09382554143667221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07788558304309845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07373463362455368, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.053762372583150864, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04651828110218048, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.043979253619909286, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04335630312561989, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.026836998760700226, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0223083458840847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.021693099290132523, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019393226131796837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.018983598798513412, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013775262050330639, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013271255418658257, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01284829992800951, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00800788588821888, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10599040985107422, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10599040985107422, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20275743305683136, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1834781914949417, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17688550055027008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1520131528377533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09487146884202957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08800031244754791, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10816437751054764, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0989747941493988, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09624268859624863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07924165576696396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07274842262268066, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05562342330813408, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047667935490608215, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04581518471240997, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.045361392199993134, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028019540011882782, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02413160726428032, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023692727088928223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021069301292300224, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02079598233103752, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015276182442903519, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015497680753469467, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01465674489736557, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011208412237465382, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10816437751054764, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10816437751054764, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1824255734682083, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.17183662950992584, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.16830110549926758, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.15303486585617065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08589112758636475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08224369585514069, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09558677673339844, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08818092197179794, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08667086064815521, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0770999938249588, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07378226518630981, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.048671700060367584, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04217465594410896, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.041153684258461, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0409175343811512, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.024354595690965652, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02120179869234562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02094906195998192, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.019423367455601692, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01927139051258564, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012862295843660831, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01282286737114191, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012522500939667225, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008567074313759804, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09558677673339844, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09558677673339844, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23562699556350708, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22246181964874268, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21821220219135284, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1986142098903656, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11103695631027222, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10656463354825974, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12315285950899124, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11356885731220245, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11197555065155029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09994680434465408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09573263674974442, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06271574646234512, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05427277088165283, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.053135935217142105, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05287376418709755, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03133854269981384, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.027197392657399178, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026905644685029984, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024933192878961563, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02476518601179123, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01642548106610775, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016163567081093788, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016048315912485123, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010466014966368675, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09994680434465408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09994680434465408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.5.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.20682096481323242, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18236041069030762, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17255647480487823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.15368838608264923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09430406987667084, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08472182601690292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11319317668676376, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10262284427881241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09730279445648193, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07984146475791931, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07629012316465378, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05781795084476471, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0492582730948925, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.0454895943403244, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.0445643812417984, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029193075373768806, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.024075299501419067, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.023628775030374527, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02128867618739605, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.020697927102446556, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016030363738536835, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01590917445719242, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01477078627794981, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01143417414277792, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10262284427881241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10262284427881241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06542223691940308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.05707644298672676, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.051773715764284134, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.04623473435640335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.029987717047333717, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.02563956379890442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.039438396692276, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.03570997342467308, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.031013429164886475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02515239827334881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.02439357154071331, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.020068923011422157, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.017106279730796814, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.014514341950416565, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.013843486085534096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010042818263173103, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.007688838988542557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.007220469415187836, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.006756722927093506, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.006316245533525944, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.0052986531518399715, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00531887449324131, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.004372025839984417, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0036510839127004147, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06542223691940308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.06542223691940308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.059238500893116, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05080719292163849, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04514222964644432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.0403277762234211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.026832781732082367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.022294161841273308, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03615347668528557, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03287988156080246, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.027895038947463036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.02223825454711914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.02167508751153946, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01836339198052883, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.015688523650169373, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.01296156831085682, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.012244028970599174, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.00917545985430479, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.006790176499634981, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.006286722142249346, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00589108606800437, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.005408178083598614, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004772440530359745, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004694673232734203, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.003803394502028823, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0030555075500160456, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.059238500893116, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.059238500893116, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1826918125152588, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16364073753356934, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1558411568403244, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1386205554008484, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0849544107913971, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.07720434665679932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09972602128982544, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09161878377199173, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.0867108628153801, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07195204496383667, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.06821306049823761, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.050639647990465164, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04374057799577713, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0406850203871727, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.03993486240506172, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025288734585046768, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02076067589223385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02009781263768673, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018085619434714317, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01759190857410431, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012969487346708775, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012612754479050636, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.011859612539410591, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007774283643811941, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09972602128982544, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09972602128982544, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.19809205830097198, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1776544153690338, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17005833983421326, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.145684152841568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09276533871889114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08514437824487686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10811932384967804, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0979774072766304, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09426514059305191, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07650713622570038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07105918973684311, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05556328967213631, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047325585037469864, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04495968297123909, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04437363147735596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027925631031394005, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.023884443566203117, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023359574377536774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020761944353580475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020387906581163406, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015109673142433167, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015701832249760628, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014320469461381435, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011590329930186272, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10811932384967804, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10811932384967804, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1733173429965973, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1634102165699005, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1600279062986374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1457226574420929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08171023428440094, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.078221894800663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09107698500156403, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08401226997375488, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08241980522871017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07344447821378708, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07029243558645248, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04643377661705017, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04020800441503525, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03918032348155975, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03893120586872101, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023239050060510635, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02025502920150757, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020015893504023552, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01859547756612301, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018444979563355446, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012324364855885506, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01236823108047247, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011974765919148922, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008389930240809917, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09107698500156403, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09107698500156403, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23337747156620026, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22049321234226227, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2162851095199585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19702966511249542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11015535891056061, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.1057172492146492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12229368835687637, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11281431466341019, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11103533208370209, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09920766204595566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09497687965631485, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.062274303287267685, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.053932271897792816, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0527280755341053, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05245404690504074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03112686797976494, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.027022989466786385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026721978560090065, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024789657443761826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024609403684735298, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01632208749651909, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016110004857182503, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015915896743535995, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01048833504319191, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09920766204595566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09920766204595566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.6.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.21014495193958282, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18710482120513916, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17803911864757538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.15859226882457733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09629904478788376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08740169554948807, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11485899239778519, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10388802736997604, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09905700385570526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08223488181829453, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07855609059333801, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.058583155274391174, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.049941740930080414, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04650507867336273, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04566264897584915, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029476452618837357, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02475159242749214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.024349240586161613, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022067919373512268, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.021538900211453438, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01606576330959797, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.0164578165858984, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014928078278899193, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012068324722349644, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10388802736997604, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10388802736997604, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07200644165277481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.06462214142084122, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.058807287365198135, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.05247070640325546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.03329518064856529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.02890484407544136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04455123096704483, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04005575552582741, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.03409300372004509, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.02858315035700798, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.027760718017816544, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.022727660834789276, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.019229521974921227, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.016149407252669334, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.015340035781264305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.011386371217668056, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.008600565604865551, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.008074240759015083, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007699549198150635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.007192086894065142, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006025875918567181, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0060375649482011795, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.004900250118225813, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004206799436360598, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07200644165277481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.07200644165277481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06153413653373718, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05456991493701935, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.04845105856657028, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.04311404004693031, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.028135240077972412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.023642441257834435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.038805365562438965, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03521283343434334, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.02895781770348549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.023957980796694756, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.023443104699254036, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01968269608914852, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01685226336121559, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.013602828606963158, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.012721844017505646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009872005321085453, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.007148421835154295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0065763876773417, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006332302000373602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.005762089509516954, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0051398249343037605, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005033707246184349, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.003996007610112429, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0032772659324109554, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06153413653373718, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06153413653373718, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1975974142551422, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17932355403900146, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17236247658729553, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15326550602912903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09227083623409271, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08509287983179092, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1070748046040535, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09789183735847473, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09386712312698364, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07896020263433456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07486816495656967, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05447344854474068, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04673020541667938, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04420100525021553, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.043579164892435074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.027206338942050934, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.022526904940605164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.021947965025901794, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01979590579867363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.019371645525097847, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014015662483870983, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013535109348595142, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013026650995016098, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008375168778002262, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1070748046040535, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1070748046040535, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20330992341041565, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18327569961547852, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1749708354473114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.15043160319328308, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09520716220140457, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08686427772045135, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11368837207555771, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10212311893701553, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09696335345506668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07963274419307709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0742817148566246, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05866070091724396, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04959302768111229, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04648357629776001, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.045722078531980515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02961617149412632, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02516639232635498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.024511415511369705, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02224106900393963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.021763211116194725, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016504142433404922, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01719949021935463, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015534648671746254, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013158879242837429, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10212311893701553, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10212311893701553, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1743626743555069, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1646716296672821, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1614006757736206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1470973789691925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08246934413909912, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07903556525707245, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09180746972560883, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08451201021671295, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08316231518983841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07419588416814804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07121922075748444, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.046986859291791916, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.040552038699388504, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03962455689907074, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.039415393024683, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023536058142781258, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020609410479664803, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020382529124617577, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01895984075963497, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018826143816113472, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012638173066079617, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012750166468322277, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012338261120021343, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008879470638930798, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09180746972560883, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09180746972560883, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23857425153255463, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22568930685520172, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2215263992547989, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.2019519805908203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11296965926885605, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.108522430062294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1256536841392517, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11546512693166733, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11388575285673141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10182130336761475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09769504517316818, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06415743380784988, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05533745512366295, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05422763526439667, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05396939814090729, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03214139863848686, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02805408090353012, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.027760393917560577, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02579340897500515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02563190646469593, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.017204225063323975, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.017088254913687706, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016833346337080002, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01164298877120018, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10182130336761475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10182130336761475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.7.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.20904995501041412, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18515652418136597, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1754840761423111, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.15604233741760254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09571439027786255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08621280640363693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11474726349115372, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1040375754237175, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09858077764511108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08107820898294449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07749426364898682, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.0585942305624485, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.049931012094020844, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.046181004494428635, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04526224732398987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029458532109856606, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.0244477279484272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.023998819291591644, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021633222699165344, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.021043723449110985, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016035694628953934, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016157489269971848, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014822669327259064, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011626679450273514, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1040375754237175, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1040375754237175, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.0719265416264534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0639820545911789, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.05953281372785568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.05268871784210205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.033186137676239014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.029405269771814346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.04139896482229233, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0378136932849884, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.03400624915957451, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.028008902445435524, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.02675963006913662, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0210161991417408, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.018066072836518288, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0159959364682436, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.015478221699595451, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.010527465492486954, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.008374650962650776, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.007990885525941849, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.007354932837188244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.007011407520622015, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.005526630207896233, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.005542542785406113, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0047952658496797085, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0037468145601451397, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.0719265416264534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.0719265416264534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06221579387784004, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.05434826761484146, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.049710895866155624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.04373853653669357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.02834666520357132, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.024501986801624298, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.03622779622673988, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03312193602323532, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.029224159196019173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.023542363196611404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.02256356179714203, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.01839914731681347, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.015818430110812187, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.013648084364831448, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.013094943948090076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.009203126654028893, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.007103488780558109, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.006702553480863571, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.006153701338917017, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.005790377501398325, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.004792976193130016, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.004733833950012922, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.004035463556647301, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0031016406137496233, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06221579387784004, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.06221579387784004, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.18313787877559662, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16412800550460815, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1567838490009308, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.13844987750053406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08493196964263916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.07748980820178986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0993926003575325, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09074106067419052, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.08671803772449493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07167275249958038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.06760327517986298, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05062654986977577, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.043402619659900665, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04070502147078514, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.040044430643320084, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025271650403738022, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02089625783264637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0203094519674778, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01820031926035881, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.017755450680851936, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013055218383669853, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012828106060624123, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.012016916647553444, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008230863139033318, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0993926003575325, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.0993926003575325, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2018541395664215, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18529371917247772, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1789979487657547, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.15610027313232422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09476422518491745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08851615339517593, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10958462208509445, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09940088540315628, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09594808518886566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08088608086109161, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07525142282247543, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05625429004430771, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.047819193452596664, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0457269549369812, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04523342102766037, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028186844661831856, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.024019408971071243, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023571226745843887, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02128385752439499, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020964914932847023, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015024437569081783, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015371724031865597, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01432853564620018, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010967668145895004, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09940088540315628, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09940088540315628, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16317525506019592, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15405425429344177, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15095067024230957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1375526487827301, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07722343504428864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0739760547876358, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08619162440299988, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07922859489917755, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07790137827396393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06944423913955688, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06660127639770508, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04405590519309044, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.037989191710948944, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03708668053150177, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03687548637390137, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022061927244067192, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.019245896488428116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01902487315237522, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01768442615866661, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01755490154027939, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011806389316916466, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011847537010908127, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011505926959216595, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0081744734197855, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08619162440299988, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08619162440299988, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23496320843696594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.22198155522346497, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2177504450082779, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19836868345737457, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.11134252697229385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10679793357849121, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12393264472484589, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11389077454805374, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11225400120019913, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10013575851917267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09605957567691803, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06338256597518921, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.054552383720874786, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05340595915913582, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05312766507267952, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03171273693442345, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.027514217421412468, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02721382863819599, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.025248970836400986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.025082893669605255, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016870416700839996, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016633596271276474, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016488632187247276, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011141232214868069, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10013575851917267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.10013575851917267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.8.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.20420898497104645, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18266557157039642, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17403268814086914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1550641804933548, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09372342377901077, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08517754822969437, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1115306168794632, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10118982940912247, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.0963296964764595, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08033903688192368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07644571363925934, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.056954577565193176, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04870235174894333, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.045345649123191833, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04452100396156311, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.028850195929408073, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.024231601506471634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.023828107863664627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021686706691980362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.021169546991586685, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016072209924459457, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016220850870013237, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014985679648816586, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012003959156572819, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10118982940912247, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10118982940912247, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09193921834230423, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08216861635446548, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07665849477052689, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.06794057786464691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.042482808232307434, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.03782527521252632, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05294758081436157, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04814352095127106, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04347226023674011, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.036008402705192566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03447165712714195, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.026948433369398117, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.023079197853803635, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02054177038371563, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.019907265901565552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013523083180189133, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.010891984216868877, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.010427039116621017, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009644661098718643, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.009240143932402134, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007219415158033371, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007354198023676872, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006322802510112524, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005208158399909735, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09193921834230423, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09193921834230423, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07609932124614716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06731006503105164, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.062087371945381165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.054825544357299805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03490925580263138, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.030519623309373856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04435752332210541, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04033737629652023, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03587133064866066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.029307791963219643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.028122875839471817, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.022525664418935776, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01930553838610649, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.01682344079017639, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01619136892259121, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011293104849755764, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.008760759606957436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.008299063891172409, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007647552527487278, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.007233193144202232, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005881127901375294, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005803702399134636, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.004993188660591841, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003819714765995741, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07609932124614716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07609932124614716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.18683023750782013, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17003920674324036, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16396766901016235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.14502425491809845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.087259940803051, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08096891641616821, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09979396313428879, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09154129028320312, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.08852479606866837, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0743856132030487, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07002202421426773, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.050601180642843246, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.043689072132110596, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04169245809316635, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04122502729296684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.025258328765630722, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02117338962852955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.020707881078124046, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018502991646528244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.018188269808888435, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012955417856574059, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012517362833023071, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.012213937938213348, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007634385954588652, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09979396313428879, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09979396313428879, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.19992324709892273, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18206284940242767, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.175556942820549, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.15115246176719666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09391553699970245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08738473802804947, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10807180404663086, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09836144745349884, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09522712230682373, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07871198654174805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07352887094020844, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05549650639295578, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04726423695683479, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.045202720910310745, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04471298307180405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027744760736823082, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.023482339456677437, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.0230106208473444, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02044598199427128, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02012593299150467, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014642365276813507, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014684008434414864, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.013972727581858635, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010071605443954468, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10807180404663086, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10807180404663086, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16316254436969757, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15352867543697357, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1502382755279541, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1367640346288681, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07714829593896866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07371117174625397, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08623801171779633, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0792839378118515, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07783940434455872, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06912853568792343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06624429672956467, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04413893073797226, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03807118162512779, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.037099748849868774, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03687494993209839, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022097671404480934, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.019378310069441795, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.019146569073200226, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01779049262404442, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.017646806314587593, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01186668872833252, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012116653844714165, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01155066303908825, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008558545261621475, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08623801171779633, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08623801171779633, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22761179506778717, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21428874135017395, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20994853973388672, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19118353724479675, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10765624046325684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10298267751932144, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12021172046661377, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11030049622058868, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10863441228866577, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09650690108537674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09248284250497818, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06148946285247803, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05289074778556824, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.051671650260686874, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05139320343732834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030808093026280403, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02676232159137726, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026456797495484352, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02452014945447445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02434437721967697, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016530398279428482, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016377700492739677, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016126323491334915, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011211891658604145, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10765624046325684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10765624046325684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.9.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.21393850445747375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19191019237041473, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18301239609718323, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.16272158920764923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09833208471536636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0896526649594307, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11677443981170654, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10597196221351624, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10095719993114471, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08434119075536728, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08007531613111496, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05938952416181564, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05088112875819206, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04746203124523163, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.046608418226242065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02992810122668743, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02510923333466053, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.024688996374607086, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.022408291697502136, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.02186625823378563, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01635661907494068, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016503937542438507, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.015239814296364784, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011879878118634224, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10597196221351624, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10597196221351624, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08012890070676804, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.07184622436761856, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.06741131842136383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.059193260967731476, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.03713733330368996, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.03332807868719101, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.045504774898290634, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.041571542620658875, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.03789614886045456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03134014829993248, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.02974271960556507, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.023108813911676407, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.019873622804880142, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.017880702391266823, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.017388299107551575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.011568553745746613, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.009335960261523724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.008954574353992939, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.00818860623985529, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.007857789285480976, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.006074042059481144, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.006093244068324566, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.005368842743337154, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0041031804867088795, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08012890070676804, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.08012890070676804, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07183457165956497, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06376273185014725, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.05949610471725464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.052010275423526764, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.033029720187187195, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.029320066794753075, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04093895107507706, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.03726261481642723, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.033796168863773346, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.027596961706876755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.026193294674158096, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.020749714225530624, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.01777377724647522, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.015872467309236526, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.015392492525279522, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.010369285941123962, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.008192487061023712, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.007830379530787468, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0071011558175086975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0067847780883312225, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005375199019908905, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.005263809114694595, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.004673995077610016, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0033871058840304613, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07183457165956497, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07183457165956497, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.18187656998634338, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.16401663422584534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.15748439729213715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1381954699754715, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08478371798992157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.07817970216274261, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09834703803062439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.08959612995386124, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.08636126667261124, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07149762660264969, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.06716395914554596, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.050061628222465515, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04279681295156479, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04065693914890289, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04014552757143974, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.024978933855891228, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02077643573284149, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02028658054769039, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018049098551273346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01769847795367241, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.012891045771539211, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.012542604468762875, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.012026885524392128, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007913454435765743, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09834703803062439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.09834703803062439, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20114021003246307, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18411734700202942, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17734678089618683, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.15587623417377472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09407412260770798, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08752242475748062, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10990390926599503, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09973054379224777, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09552879631519318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08071450144052505, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07618436217308044, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05655461549758911, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04808078706264496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.045495886355638504, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04487326368689537, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028372928500175476, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.024088848382234573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02355087175965309, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.021488631144165993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02109898254275322, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01538791973143816, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015737811103463173, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014559024944901466, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01146402582526207, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09973054379224777, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09973054379224777, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1615435779094696, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1510990411043167, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.147470623254776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13363149762153625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07620374858379364, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07240308076143265, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08567160367965698, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07867702841758728, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07701721787452698, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06776200234889984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0648547038435936, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.043780773878097534, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03776659816503525, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.036657754331827164, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03639177605509758, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02194630727171898, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.019146256148815155, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01888614147901535, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01747848093509674, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.017313573509454727, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011769447475671768, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012006647884845734, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01139227207750082, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008484793826937675, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08567160367965698, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08567160367965698, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2199482023715973, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2059265673160553, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2012079358100891, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18231934309005737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10378582775592804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09874337166547775, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11624991148710251, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10679999738931656, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10485115647315979, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0922982394695282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08823034167289734, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05940718948841095, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05113372579216957, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04976843297481537, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04944157972931862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029701199382543564, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02566341497004032, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02531723864376545, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023340266197919846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02313288114964962, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01573425717651844, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015599783509969711, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015260735526680946, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010466129519045353, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10679999738931656, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10679999738931656, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.10.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.22282181680202484, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20063631236553192, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.19194136559963226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17067095637321472, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10285820811986923, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0943850725889206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12192880362272263, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1101856380701065, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10550225526094437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08858799189329147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0840606763958931, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.062303055077791214, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0530000776052475, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04971032217144966, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04890334978699684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.031451936811208725, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.026396576315164566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0260041281580925, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02365221083164215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023141710087656975, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017285369336605072, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017414353787899017, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016198594123125076, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012713558971881866, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10550225526094437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10550225526094437, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09330078214406967, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08332879096269608, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07814967632293701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0684969574213028, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04334187135100365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0388159453868866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05299648270010948, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.048349205404520035, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04421592131257057, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0363282635807991, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03446647524833679, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.026969511061906815, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.023193582892417908, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0209471695125103, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.020395366474986076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013515369035303593, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.011052756570279598, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.010624554008245468, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.009691957384347916, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.009327017702162266, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007156200706958771, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007353521417826414, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006367296911776066, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0051621380262076855, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09330078214406967, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09330078214406967, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07731243968009949, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.06868825107812881, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06359194219112396, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.05557345598936081, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03559758886694908, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03142296150326729, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04482007026672363, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.040755439549684525, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.036394622176885605, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.029718544334173203, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.028345933184027672, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02276921644806862, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.019499341025948524, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.017160650342702866, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.016578279435634613, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0114034628495574, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.008967703208327293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.008538458496332169, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.007802006788551807, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.007412915118038654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.005979461595416069, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00593475392088294, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005157256964594126, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00396495359018445, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07731243968009949, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07731243968009949, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.19064106047153473, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17016121745109558, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1626461297273636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1415816992521286, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08846668899059296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08069400489330292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10413138568401337, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09390455484390259, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09011813253164291, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0735725685954094, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.06887976080179214, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05304688215255737, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04485451430082321, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04234720394015312, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.041746605187654495, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.026488419622182846, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.021676471456885338, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.021089322865009308, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.018630553036928177, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.018221303820610046, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013695335015654564, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013153990730643272, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01257509458810091, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008346368558704853, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10413138568401337, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10413138568401337, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.19749054312705994, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18188652396202087, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16994774341583252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.14993177354335785, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09356126189231873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08427468687295914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1220865473151207, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10790712386369705, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09474210441112518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08098356425762177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07847735285758972, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06325501948595047, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05227312445640564, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04557491093873978, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04390212520956993, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03174460679292679, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.024436330422759056, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023330973461270332, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.022022424265742302, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02096988633275032, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017100006341934204, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016984382644295692, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014670327305793762, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012302206829190254, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10790712386369705, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10790712386369705, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1638105809688568, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15280947089195251, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1488446742296219, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1347295045852661, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07733164727687836, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07329116761684418, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08735572546720505, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08015467971563339, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07821401953697205, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06855111569166183, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06554543972015381, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04473092779517174, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03857869282364845, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03731200844049454, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03701085224747658, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02245476469397545, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01969129592180252, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.019404100254178047, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017974374815821648, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.017792480066418648, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012163403443992138, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012630532495677471, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011736325919628143, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009198920801281929, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08735572546720505, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08735572546720505, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2209901064634323, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20650038123130798, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2015320509672165, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1824181228876114, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10422412306070328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09898479282855988, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1171758621931076, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10753214359283447, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10534410178661346, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09241363406181335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08824135363101959, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05967603251338005, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.051467135548591614, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04995451122522354, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04959459975361824, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02985280379652977, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02570977807044983, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02534610778093338, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023317595943808556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.023088593035936356, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01568824052810669, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015567135065793991, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015148214995861053, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010325977578759193, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10753214359283447, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10753214359283447, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.11.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.22299832105636597, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20134399831295013, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1929253488779068, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1715022623538971, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10321475565433502, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09489341825246811, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12196904420852661, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11032591015100479, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10576341301202774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08902935683727264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08457271754741669, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06245239078998566, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0531376376748085, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04990561679005623, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.049115635454654694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03149287775158882, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.026490453630685806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02611009031534195, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023754725232720375, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023259982466697693, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017200376838445663, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01745212823152542, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016111450269818306, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01274916622787714, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10576341301202774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10576341301202774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09802991151809692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08723436295986176, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08103140443563461, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07108286023139954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.045480936765670776, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.040235407650470734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05777227133512497, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05183590203523636, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.046497892588377, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.038127463310956955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.036635078489780426, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.029541729018092155, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.024952415376901627, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.022046180441975594, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02131524309515953, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014891821891069412, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.011748610995709896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011224261485040188, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01033828780055046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.009869817644357681, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008002704940736294, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008033301681280136, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006915607023984194, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005749009549617767, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09802991151809692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09802991151809692, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08277636766433716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07317529618740082, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06759133189916611, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.05906900018453598, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03809978812932968, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0334138423204422, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04857315495610237, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04372473061084747, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03906198590993881, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03170033171772957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.030293956398963928, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.024747200310230255, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.020967941731214523, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.018420975655317307, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.017792467027902603, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.012418261729180813, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009710028767585754, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.00923417042940855, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008463548496365547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008039936423301697, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006556537002325058, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006547192111611366, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005615949630737305, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004509482067078352, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08277636766433716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08277636766433716, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1915896087884903, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17273643612861633, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1654171645641327, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.14515024423599243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08968818187713623, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08217019587755203, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10586094111204147, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09524830430746078, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09119951725006104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07536399364471436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07109162211418152, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05391785874962807, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04556313902139664, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04295801743865013, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04232301190495491, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.026964737102389336, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.021956777200102806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0213768370449543, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0190287996083498, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.018608596175909042, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013945437036454678, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013295442797243595, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.012846172787249088, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008349718526005745, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10586094111204147, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10586094111204147, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2125372588634491, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19249457120895386, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1849810928106308, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.159502774477005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09968618303537369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09198221564292908, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11670177429914474, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1054505854845047, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10147389024496078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08386556804180145, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07762584090232849, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06025543436408043, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.050928447395563126, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.048297375440597534, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.047670990228652954, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030293624848127365, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.0255715511739254, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.024999607354402542, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02247888781130314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.022075315937399864, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016464265063405037, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01670403778553009, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015600305050611496, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012192497961223125, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1054505854845047, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1054505854845047, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16808468103408813, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15633440017700195, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15219736099243164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13771167397499084, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07948943227529526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07522653788328171, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08993304520845413, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08226965367794037, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08041668683290482, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0702185332775116, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06720780581235886, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0461551733314991, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.039706725627183914, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03844921663403511, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03815435245633125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023231295868754387, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.020440207794308662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02015121653676033, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018655555322766304, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018470466136932373, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012792713940143585, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.013306141830980778, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01238854881376028, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009902971796691418, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08993304520845413, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08993304520845413, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2293601930141449, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21381840109825134, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.208604633808136, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18858790397644043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10835326462984085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.1027679368853569, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12150643765926361, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11160553991794586, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10954038798809052, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09576153010129929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09142675995826721, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.062143903225660324, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05352200195193291, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05203825980424881, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05168174207210541, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03109649010002613, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.026950594037771225, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026575816795229912, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024418670684099197, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024196434766054153, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016509925946593285, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016553429886698723, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016014792025089264, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011300353333353996, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09576153010129929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09576153010129929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.12.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.22663776576519012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20384983718395233, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1939254254102707, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1726718246936798, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10464423894882202, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09529642015695572, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12543939054012299, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11372047662734985, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1074644923210144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09010212123394012, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08563912659883499, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06398431211709976, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05471197888255119, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.050656188279390335, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.049667246639728546, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.032306328415870667, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02701473981142044, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.026513995602726936, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02423606626689434, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023595720529556274, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017707960680127144, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018067913129925728, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016347024589776993, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013238576240837574, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1074644923210144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1074644923210144, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09980960190296173, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.089895099401474, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0851714164018631, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07482054084539413, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04645015299320221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04220283031463623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.055607493966817856, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.050800103694200516, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04729871451854706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.039207253605127335, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03710481524467468, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.02824312448501587, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.024279151111841202, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02233566902577877, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.021857932209968567, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014138366095721722, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.011596470139920712, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011212524957954884, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010149307548999786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.00983567163348198, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007408576551824808, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00739733362570405, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006700871046632528, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004955714102834463, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09980960190296173, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09980960190296173, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08181896805763245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0736326277256012, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0691860243678093, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06077088043093681, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03798071667551994, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.034141525626182556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.046539995819330215, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04237806797027588, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03869209438562393, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.032048299908638, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.030481284484267235, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02363036572933197, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02028013952076435, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.018252331763505936, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.017759747803211212, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011813986115157604, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009460005909204483, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009081817232072353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008276669308543205, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.007944734767079353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00617759395390749, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006085183937102556, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005463787820190191, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003983340226113796, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08181896805763245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08181896805763245, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2062043845653534, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18599744141101837, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17864637076854706, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15644387900829315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0961504802107811, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08867669105529785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11149582266807556, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1012636199593544, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09781599789857864, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08077612519264221, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0758872851729393, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05676206946372986, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04832353815436363, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0459870770573616, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04543597996234894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028340427204966545, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02339620143175125, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02283668890595436, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02022327482700348, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.019832614809274673, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014540617354214191, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013933354057371616, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013518712483346462, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008573848754167557, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1012636199593544, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1012636199593544, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.23235836625099182, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.21325045824050903, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.20618818700313568, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.184159055352211, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10893181711435318, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.10175688564777374, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.12552203238010406, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.11433414369821548, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11040181666612625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09401708841323853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0898727998137474, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06453493982553482, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0550842247903347, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05262036249041557, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.05204655975103378, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.032378870993852615, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.027794957160949707, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.027275757864117622, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02487603947520256, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.024509545415639877, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.017510268837213516, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017991838976740837, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016715042293071747, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013079501688480377, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09401708841323853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09401708841323853, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.16984876990318298, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15800741314888, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15384408831596375, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13909417390823364, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08029582351446152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07594688981771469, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09078402072191238, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08326195180416107, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.0812884047627449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07095809280872345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06784119457006454, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.046540312469005585, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.040123436599969864, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03878282010555267, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.038469940423965454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.023343995213508606, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02049666829407215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.020200371742248535, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.018676243722438812, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.018479617312550545, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012628846801817417, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01320735178887844, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.012181195430457592, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009667291305959225, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09078402072191238, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09078402072191238, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23193083703517914, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2162550389766693, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21096187829971313, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19056126475334167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10954160988330841, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10387655347585678, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12292531132698059, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11292991042137146, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11079493910074234, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09676934033632278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0922933965921402, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06278974562883377, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05410357564687729, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05257568135857582, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05221423879265785, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031417056918144226, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.027162501588463783, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02678314782679081, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024586794897913933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02435668371617794, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016624752432107925, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016588406637310982, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016100512817502022, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01119694672524929, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09676934033632278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09676934033632278, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.13.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.23770594596862793, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.21348024904727936, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.20385326445102692, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.18107794225215912, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10995150357484818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.10037864744663239, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.13043051958084106, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11800409853458405, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.11285997927188873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09429550915956497, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0894368588924408, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06681559234857559, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.0567573606967926, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.053073007613420486, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.05217595770955086, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03371238708496094, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.028043605387210846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0276138037443161, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.025011790916323662, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.024445803835988045, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01844513788819313, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01835821568965912, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.017235424369573593, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013201018795371056, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09429550915956497, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.09429550915956497, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10408695787191391, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09398383647203445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08845943957567215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07804245501756668, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04849272966384888, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04381970316171646, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05938756838440895, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.053980231285095215, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04941071569919586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04121951013803482, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.039307851344347, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.030207855626940727, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02584938518702984, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02339775115251541, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.022785408422350883, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015133918263018131, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.01228038128465414, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01182789821177721, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01085105910897255, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010458861477673054, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.007978380657732487, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008072384633123875, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007079673931002617, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005587152671068907, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10408695787191391, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10408695787191391, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0826813206076622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07490549981594086, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06923973560333252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06124766170978546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03835553303360939, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03400597721338272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.049151089042425156, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04462873563170433, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03908064588904381, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03281548619270325, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03150974214076996, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.024917298927903175, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.021334320306777954, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.018482407554984093, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.017759069800376892, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01245209202170372, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009628110565245152, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009130015969276428, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008539247326552868, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008065314963459969, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006519776303321123, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006406618747860193, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005540927872061729, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004214265383780003, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0826813206076622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0826813206076622, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21201612055301666, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1919456571340561, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18425562977790833, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.16165055334568024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09903179109096527, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09124863147735596, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11634505540132523, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10478601604700089, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10067714750766754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08369545638561249, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07891446352005005, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05946458876132965, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05013125762343407, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04748360440135002, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.046823542565107346, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029739320278167725, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02431858889758587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.023730063810944557, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021197007969021797, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.020776286721229553, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015378742478787899, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014795799739658833, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0141280023381114, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009463399648666382, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10478601604700089, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10478601604700089, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.23672248423099518, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20690369606018066, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.19458633661270142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17241983115673065, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.11150586605072021, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09879007935523987, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1337243616580963, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.12070535868406296, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.11348547786474228, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09093014895915985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08721999824047089, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06886361539363861, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05826255679130554, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.05404101312160492, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.053026922047138214, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.034538231790065765, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02862183190882206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02772008627653122, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.024624958634376526, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023954417556524277, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.01849258504807949, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.018884941935539246, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.017142200842499733, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013611555099487305, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09093014895915985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09093014895915985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.160923033952713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.15010537207126617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14595890045166016, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.13222207129001617, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07625473290681839, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07215609401464462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08660933375358582, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07943921536207199, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.077175572514534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0676167830824852, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06470952183008194, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.044447995722293854, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.038358453661203384, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03693149611353874, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0365874208509922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.022307196632027626, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.019647642970085144, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.019351720809936523, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017979541793465614, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.017769409343600273, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.012115479446947575, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012858567759394646, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011625303886830807, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009575356729328632, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08660933375358582, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08660933375358582, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22682146728038788, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21209804713726044, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20699961483478546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18749386072158813, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10744821280241013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10206288844347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12088500708341599, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11090158671140671, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.1085849180817604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09536827355623245, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0911894291639328, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.061940740793943405, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05323639139533043, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05166936293244362, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0513005368411541, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031026557087898254, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.026876050978899002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026507414877414703, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024458695203065872, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024227555841207504, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016643738374114037, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01667986996471882, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01610209420323372, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011579313315451145, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10744821280241013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10744821280241013, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.14.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2260216772556305, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.20202365517616272, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1922103315591812, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.17142651975154877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10430212318897247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0947185680270195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12572796642780304, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11294461786746979, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10723017901182175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08952469378709793, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.0855182558298111, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06439787149429321, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05459387227892876, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.0506608709692955, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04966191574931145, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.032755136489868164, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.0272659994661808, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.026777852326631546, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.024481168016791344, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.023847457021474838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.018305467441678047, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.018542926758527756, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016842931509017944, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013907692395150661, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10723017901182175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10723017901182175, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11878785490989685, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10831653326749802, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10264697670936584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09131727367639542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05550293251872063, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.050669603049755096, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.0677463710308075, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.061296943575143814, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.0564224049448967, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.047871049493551254, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04574552923440933, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.034504931420087814, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.029443617910146713, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.026841893792152405, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.026200314983725548, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.017371129244565964, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014204144477844238, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013721690513193607, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0127099072560668, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01230275072157383, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.009250183589756489, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.009427065961062908, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.00827336311340332, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006697207689285278, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10831653326749802, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10831653326749802, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08735824376344681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07964354753494263, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07350058853626251, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06559082120656967, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.040501344949007034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03590689226984978, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05270557105541229, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.047554343938827515, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04124514013528824, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03511903062462807, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03403828293085098, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026793505996465683, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02279145084321499, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0195352490991354, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01868329755961895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013446486555039883, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010205226950347424, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009636876173317432, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009146155789494514, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008604675531387329, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006997808814048767, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006867791526019573, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005831826012581587, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004531131125986576, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08735824376344681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08735824376344681, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.23192265629768372, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21403294801712036, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20676229894161224, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.18557363748550415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.109551802277565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10248295217752457, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.127008318901062, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11521732062101364, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11114659905433655, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09545248001813889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0909346416592598, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06480874121189117, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05514787882566452, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.052551351487636566, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05195385217666626, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03235313668847084, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02679523080587387, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.026223791763186455, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.0238584503531456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.023456202819943428, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016694949939846992, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016046930104494095, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.015648916363716125, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009985078126192093, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09545248001813889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09545248001813889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.22007915377616882, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20130477845668793, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1945509910583496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.16966082155704498, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.1030193641781807, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0959523618221283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11887282133102417, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10779327154159546, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10443642735481262, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08800403773784637, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0815754309296608, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06101042032241821, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.051751017570495605, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.049563776701688766, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.049058008939027786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03054557926952839, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.025827687233686447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.025332164019346237, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02282831259071827, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.022489309310913086, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016251368448138237, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016213122755289078, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015525475144386292, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011234202422201633, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10779327154159546, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10779327154159546, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15709789097309113, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14718401432037354, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.14345018565654755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1302832067012787, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07454446703195572, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0708257332444191, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08435703814029694, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0773548036813736, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07534752786159515, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06650896370410919, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06377776712179184, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.043353185057640076, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03741933032870293, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0361456423997879, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.035846393555402756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.021811015903949738, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.019296826794743538, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01902688667178154, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.017746716737747192, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01756594143807888, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011992906220257282, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.012697002850472927, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.011578037403523922, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009550184942781925, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08435703814029694, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08435703814029694, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23051507771015167, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21663017570972443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21188323199748993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1923513114452362, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10913725197315216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.1041065901517868, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12217807024717331, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11228106915950775, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11017904430627823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0974518209695816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0932694673538208, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06238889694213867, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05376045033335686, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.052327074110507965, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.051987964659929276, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031198011711239815, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.026909366250038147, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02656651847064495, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02453216165304184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02431987226009369, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01646387204527855, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016230950132012367, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015971535816788673, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01073881983757019, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0974518209695816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0974518209695816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.15.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.22283393144607544, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19787771999835968, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18750740587711334, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1673346906900406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10228683054447174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09212175011634827, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1229967549443245, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.1114330142736435, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1054002195596695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0871577113866806, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08316478878259659, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06277437508106232, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05356758087873459, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.049424055963754654, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04839930683374405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.031727779656648636, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.026202460750937462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.02569575235247612, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.023286674171686172, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.022631904110312462, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017475729808211327, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.0173800066113472, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.01612875424325466, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012532023712992668, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1054002195596695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.1054002195596695, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11626879125833511, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10678738355636597, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.10192713141441345, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09091027081012726, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.054289381951093674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.05002737417817116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06464110314846039, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05909822881221771, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05507528781890869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.047108426690101624, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04493408650159836, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03284268453717232, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02825096994638443, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02608649805188179, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.025560231879353523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016454515978693962, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013550598174333572, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.013142666779458523, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012122577987611294, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011784753762185574, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008638021536171436, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00863583479076624, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.00785351637750864, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0058101993054151535, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10678738355636597, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10678738355636597, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08938144892454147, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.0820763111114502, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07663410902023315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06856898963451385, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04141712933778763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.037315692752599716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05200072005391121, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0477200485765934, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.0420687273144722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03621290251612663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.034806977957487106, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02633093297481537, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.022748805582523346, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.01989980787038803, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.019177677109837532, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013157698325812817, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010295676067471504, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.00980797316879034, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009250604547560215, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008793891407549381, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0068405549973249435, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006717247422784567, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0058608632534742355, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004312470089644194, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08938144892454147, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08938144892454147, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.23364387452602386, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21677814424037933, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.21049043536186218, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1897086352109909, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.1101544126868248, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10383974015712738, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12656652927398682, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11489041149616241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.11154637485742569, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09681771695613861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09242936223745346, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06467103958129883, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05489727482199669, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.05272652208805084, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.052221618592739105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03231625631451607, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.026778876781463623, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.026285458356142044, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.024019740521907806, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.023668836802244186, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016582058742642403, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.01578979752957821, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.015592928975820541, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009660257026553154, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09681771695613861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09681771695613861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.22157754004001617, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.20380309224128723, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1977674514055252, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.17608819901943207, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10373254120349884, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09703688323497772, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11835452914237976, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10782334208488464, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10509919375181198, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.09033362567424774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08517353981733322, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.06090109050273895, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05216420069336891, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.050218366086483, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.049747075885534286, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.030748577788472176, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.026604441925883293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.026108121499419212, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.023986276239156723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.023704852908849716, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016838237643241882, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017271852120757103, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.016212644055485725, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01271419320255518, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10782334208488464, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10782334208488464, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.14749759435653687, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1384555995464325, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13489162921905518, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12275389581918716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06995505094528198, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06649062782526016, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07947596907615662, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07283113896846771, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07070804387331009, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06263121962547302, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.06017787754535675, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.04078143835067749, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03518160805106163, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03388577699661255, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03357100486755371, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.020505733788013458, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018037356436252594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.017776668071746826, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01661866158246994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.016430145129561424, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01120226364582777, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011809190735220909, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010775389149785042, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00880994088947773, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07947596907615662, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07947596907615662, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22145716845989227, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20860296487808228, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20404331386089325, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18556183576583862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10479769110679626, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10012473165988922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11753354221582413, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10799366235733032, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10575126111507416, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09397052228450775, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09007571637630463, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.060044821351766586, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.051708806306123734, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.050273749977350235, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.049925707280635834, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030042480677366257, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02588989958167076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.025561776012182236, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023693740367889404, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.023480068892240524, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015920553356409073, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01569034904241562, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015429742634296417, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010450569912791252, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10799366235733032, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10799366235733032, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.16.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2106926590204239, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18506966531276703, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.17396634817123413, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.15470370650291443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09634341299533844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08572817593812943, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1173277348279953, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10632435977458954, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09970097988843918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.08104009926319122, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07758504152297974, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05998517945408821, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05112714692950249, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04657380282878876, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.045435573905706406, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030337858945131302, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02471041865646839, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.0241607166826725, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021762533113360405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.021035945042967796, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0167070422321558, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01651672087609768, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.015236491337418556, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011852159164845943, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10632435977458954, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10632435977458954, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09399890899658203, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08649991452693939, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08190267533063889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07359857112169266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04383453354239464, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.040094245225191116, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05364542454481125, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.048998478800058365, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04455535113811493, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03842935711145401, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03698606789112091, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.027280230075120926, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02342948690056801, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02110830880701542, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02053413726389408, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013649229891598225, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.011039318516850471, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01063980907201767, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.00997849926352501, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.009613258764147758, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00716960895806551, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007223759312182665, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.0063393511809408665, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.004917453974485397, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09399890899658203, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09399890899658203, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07903298735618591, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07237327843904495, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06715082377195358, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.0604756698012352, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.036541152745485306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03272033855319023, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.04689214378595352, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.042777325958013535, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.037233397364616394, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03207745403051376, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03104611672461033, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02371497079730034, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02037697285413742, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.017604945227503777, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.016879701986908913, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.011853835545480251, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009168657474219799, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.008691389113664627, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008278822526335716, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.007822408340871334, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006175146903842688, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006104069296270609, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005205883644521236, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.003990391734987497, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07903298735618591, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.07903298735618591, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.19467245042324066, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17728054523468018, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16866599023342133, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15199026465415955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09023594111204147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08264200389385223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.10972297191619873, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09896137565374374, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.0922345295548439, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07870547473430634, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07569541782140732, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05592308193445206, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04723445326089859, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04324499890208244, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04227631166577339, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.027921954169869423, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.022155629470944405, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.021400103345513344, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019805829972028732, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01915072649717331, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014346057549118996, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013758646324276924, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01269494742155075, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008569523692131042, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09896137565374374, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.09896137565374374, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.21698115766048431, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.19559310376644135, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18742439150810242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1663638949394226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10144507139921188, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09222492575645447, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1192353293299675, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10787854343652725, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10342013835906982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08665141463279724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.08150199800729752, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.061555713415145874, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.052162762731313705, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0492156520485878, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04850543662905693, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.03087901510298252, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.026181917637586594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02550381049513817, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02332143671810627, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.02286456897854805, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016734156757593155, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017278866842389107, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01578724943101406, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012725437059998512, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10787854343652725, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10787854343652725, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.15142770111560822, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.14216464757919312, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13854464888572693, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1261676549911499, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.07160019129514694, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0681101456284523, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08113756030797958, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07453475147485733, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.07238296419382095, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06423888355493546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0617034025490284, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.041533175855875015, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.035905640572309494, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03460007533431053, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03428944572806358, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02088388428092003, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.018307600170373917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01803925633430481, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.016864659264683723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.016677478328347206, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.011356067843735218, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011833460070192814, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010926743969321251, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008672010153532028, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08113756030797958, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.08113756030797958, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22551828622817993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21266992390155792, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20813216269016266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18959316611289978, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10660089552402496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10200698673725128, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11950362473726273, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10980546474456787, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10758062452077866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09583120793104172, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09212027490139008, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06115224212408066, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.052665743976831436, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.051227372139692307, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.050896331667900085, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030758269131183624, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.026600567623972893, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02627384662628174, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024442194029688835, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024228712543845177, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016560014337301254, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01641734503209591, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016086436808109283, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.011313512921333313, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10758062452077866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10660089552402496, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.17.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.22287116944789886, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.19422514736652374, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.18219222128391266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.16071215271949768, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.10193702578544617, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.09022490680217743, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.12472308427095413, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.11234835535287857, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10558076202869415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.0847737044095993, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.08078224956989288, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06371373683214188, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.05411658063530922, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04942324757575989, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04825514554977417, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.03227392956614494, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.026440590620040894, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.025870634242892265, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02317311055958271, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.022415217012166977, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0178786963224411, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017907235771417618, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.016339963302016258, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013150321319699287, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10558076202869415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10558076202869415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11004544049501419, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10188671201467514, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09695694595575333, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0872419998049736, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05151214078068733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04759058356285095, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06264049559831619, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05694934353232384, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.0522712841629982, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04540835693478584, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.043644994497299194, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03196116164326668, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027351493015885353, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02491777017712593, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02431626059114933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016050541773438454, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013180910609662533, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012768021784722805, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011991236358880997, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011623343452811241, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008564096875488758, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008759462274610996, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007679860107600689, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.006241159047931433, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10188671201467514, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10188671201467514, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08845453709363937, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08071233332157135, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0744413286447525, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06723649054765701, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0408356748521328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.036218538880348206, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05295991152524948, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0484284944832325, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.041672684252262115, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03586976230144501, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.034797847270965576, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026811176910996437, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023114290088415146, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.019679248332977295, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.018786689266562462, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013418135233223438, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.01027146726846695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009680073708295822, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009290607646107674, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008728498592972755, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.00698435865342617, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.00692614633589983, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0058087753131985664, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004539775662124157, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08845453709363937, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08845453709363937, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2135317027568817, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19525602459907532, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18652667105197906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.16919206082820892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09993132203817368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0919542908668518, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12039154022932053, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10821635276079178, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.1018606647849083, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0873752161860466, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.0842982605099678, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06155358627438545, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05177866667509079, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04790804535150528, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.046972423791885376, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.0307709202170372, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02448229491710663, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02375115267932415, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021908869966864586, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.021300887688994408, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015868542715907097, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015024722553789616, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.014189704321324825, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009365401230752468, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10821635276079178, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10821635276079178, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.195184126496315, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17242549359798431, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16483913362026215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1385633945465088, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.0905345231294632, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08240946382284164, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10511118918657303, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0952080562710762, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.0920867919921875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07295884937047958, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06772064417600632, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05404902994632721, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0462232269346714, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.044081833213567734, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.043578971177339554, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02731975167989731, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02381243370473385, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02328793704509735, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020496651530265808, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020162569358944893, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015204399824142456, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016064675524830818, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014525719918310642, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0123214740306139, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10511118918657303, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10511118918657303, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.14383916556835175, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13501441478729248, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13146214187145233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11968283355236053, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06799346953630447, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06459176540374756, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07711198180913925, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07097552716732025, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06873839348554611, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06095677614212036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0585012212395668, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03945010527968407, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03413919731974602, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03280460834503174, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0324828214943409, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01976950094103813, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01725163124501705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.016982465982437134, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.0158676877617836, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.015672050416469574, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010618750005960464, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011035492643713951, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010164710693061352, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007934082299470901, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07711198180913925, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07711198180913925, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.21496346592903137, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2026461809873581, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19816212356090546, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18057405948638916, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10162883251905441, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09710313379764557, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11422870308160782, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.104871466755867, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10258214175701141, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09137226641178131, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08771158009767532, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05828772857785225, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05024675279855728, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.048771973699331284, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.048411011695861816, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029240520671010017, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.025170765817165375, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02484133467078209, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023085104301571846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.022870507091283798, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015597566030919552, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015332736074924469, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.01510314829647541, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010287312790751457, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.104871466755867, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.104871466755867, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.18.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2117636352777481, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.18292047083377838, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1707465648651123, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.14969530701637268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.0966239646077156, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08482600748538971, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11906596273183823, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10716640949249268, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.10029365867376328, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07952620089054108, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07583007961511612, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.06097719445824623, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.051754795014858246, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.046924710273742676, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.045719727873802185, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030960820615291595, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.025271916761994362, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.024685561656951904, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.02206326276063919, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.021290738135576248, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017245899885892868, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017363708466291428, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.015641184523701668, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012925333343446255, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10716640949249268, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10716640949249268, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11289068311452866, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10484391450881958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09981712698936462, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.09039458632469177, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05285368859767914, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.048849593847990036, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06426257640123367, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05864251032471657, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05363518372178078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04698631539940834, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.045296330004930496, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03273291885852814, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028077049180865288, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.025486741214990616, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.024845710024237633, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016406027600169182, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013352666050195694, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012905758805572987, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012191196903586388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011788899078965187, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008692697621881962, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008725293911993504, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007769813295453787, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005997007712721825, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10484391450881958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10484391450881958, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08667000383138657, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07995666563510895, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07365242391824722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06678574532270432, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04013921692967415, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03566858172416687, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.052560530602931976, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04809226095676422, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04088282585144043, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03564365580677986, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03475883603096008, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02662033587694168, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02289612591266632, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.019343992695212364, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.018420401960611343, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013330507092177868, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010083579458296299, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009479201398789883, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00919469352811575, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.00859877374023199, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006955908611416817, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.0068160113878548145, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005712150130420923, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0043973857536911964, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08667000383138657, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08667000383138657, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.22990450263023376, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21217523515224457, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20428265631198883, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.18541011214256287, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10794433951377869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.10043083876371384, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1278136819601059, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11529205739498138, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10977959632873535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0951734334230423, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09152954816818237, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06529562175273895, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.055078182369470596, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.051711514592170715, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05087581276893616, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.032640065997838974, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02631707862019539, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02563386783003807, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02365552820265293, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02312185801565647, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01670987904071808, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015843020752072334, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.015147530473768711, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009655595757067204, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10794433951377869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10794433951377869, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20798200368881226, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.183251291513443, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1748390793800354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.14496120810508728, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09669956564903259, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08773794025182724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11374738812446594, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10195238143205643, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.0984940379858017, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07730884104967117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06977159529924393, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05864972248673439, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04970225319266319, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.047286100685596466, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.046723589301109314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02970208041369915, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.025795336812734604, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.025236377492547035, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02221504971385002, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.021847186610102654, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.016708150506019592, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.017752232030034065, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015938619151711464, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.013915353454649448, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10195238143205643, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10195238143205643, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.14150118827819824, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13292571902275085, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12953200936317444, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11802823096513748, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06687583774328232, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06355974078178406, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07559831440448761, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06961944699287415, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06758387386798859, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05997871607542038, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.057565104216337204, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03860563784837723, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03344451263546944, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03220780938863754, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.031910598278045654, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01935834437608719, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.016870681196451187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.016620682552456856, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015516333281993866, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.015333421528339386, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010378541424870491, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010690235532820225, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009965966455638409, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007594285532832146, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07559831440448761, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07559831440448761, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.211544468998909, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.19940395653247833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.19500824809074402, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1778043657541275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.09995942562818527, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0955684632062912, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11214189231395721, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10315665602684021, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10092633962631226, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0899544432759285, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08624397963285446, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05715354532003403, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04942446947097778, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04798183590173721, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04764178395271301, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.028676534071564674, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.024818049743771553, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.024501672014594078, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.022794995456933975, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.022580653429031372, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015325256623327732, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015199915505945683, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.014845252968370914, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010295296087861061, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10315665602684021, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10315665602684021, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.19.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.2047233134508133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.1757838875055313, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.16252535581588745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.14120325446128845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09344078600406647, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.08084297925233841, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11661248654127121, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10538043081760406, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.0970468521118164, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07606491446495056, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07238107174634933, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.059869151562452316, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.051043957471847534, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.045533187687397, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04412989318370819, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.030544746667146683, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.024711381644010544, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.024014787748456, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.021486451849341393, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.020578667521476746, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.017206599935889244, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.017326965928077698, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.015439321286976337, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.013005062006413937, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10538043081760406, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10538043081760406, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11272255331277847, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1043560728430748, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09827665984630585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08938334882259369, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05263460427522659, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04796316474676132, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06601349264383316, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.059962958097457886, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05350359156727791, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04686446115374565, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04544011875987053, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.033619023859500885, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028727376833558083, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.025408083572983742, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02458089590072632, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016867581754922867, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013376748189330101, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012826608493924141, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.0122523233294487, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011737610213458538, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008966890163719654, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008953485637903214, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007792794611304998, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0061717950738966465, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1043560728430748, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1043560728430748, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08740029484033585, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07984542846679688, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07208666205406189, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06554416567087173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.040307678282260895, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03482252359390259, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05511005222797394, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.050109945237636566, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04120762273669243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.035665396600961685, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03504412621259689, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027827568352222443, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023870177567005157, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.019477803260087967, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01828799955546856, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013941071927547455, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010195954702794552, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009429896250367165, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009277800098061562, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008526753634214401, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0072774263098835945, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007090172730386257, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005737186875194311, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004570336081087589, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08740029484033585, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08740029484033585, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2165144979953766, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.19705483317375183, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18804700672626495, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.17027907073497772, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10060455650091171, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09223771840333939, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12115447968244553, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10936464369297028, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10269990563392639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0878005176782608, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08477942645549774, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06199033558368683, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.052254512906074524, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04820314049720764, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.047192711383104324, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03100656159222126, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02461308054625988, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.023834817111492157, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02199711464345455, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.02134746126830578, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0159166157245636, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015108620747923851, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.014127255417406559, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009365180507302284, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10269990563392639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10269990563392639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20676034688949585, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18108324706554413, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1726279854774475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.14304031431674957, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09645876288414001, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08747629821300507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11177173256874084, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1015552505850792, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09840530157089233, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07596739381551743, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0686807781457901, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.057443082332611084, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04907875135540962, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04675234854221344, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04619361460208893, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028938205912709236, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02485112100839615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.024259794503450394, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020908597856760025, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020533552393317223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015710949897766113, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01631391979753971, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014972401782870293, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.012019743211567402, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1015552505850792, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1015552505850792, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.14366233348846436, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13544052839279175, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1323716640472412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1207776665687561, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06794668734073639, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06489280611276627, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0762069821357727, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.07028809189796448, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06861330568790436, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0611715242266655, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.05867013707756996, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.038883548229932785, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03374150022864342, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03268876671791077, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03243943303823471, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019476646557450294, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01703648641705513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.016809048131108284, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01568553037941456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01553317904472351, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010388310067355633, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010641325265169144, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010040374472737312, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007448690477758646, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0762069821357727, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0762069821357727, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22013846039772034, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.20802205801010132, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20390869677066803, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18598559498786926, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10399086773395538, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09966680407524109, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11608818918466568, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10683757066726685, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10490581393241882, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09378375858068466, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08992457389831543, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.059108927845954895, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05112244188785553, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.049830112606287, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04953339695930481, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029645884409546852, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02561648190021515, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02531229704618454, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02353980392217636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02334648370742798, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015773387625813484, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01542697474360466, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015348981134593487, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010196542367339134, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10683757066726685, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10683757066726685, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.20.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.20011070370674133, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.17093425989151, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.15766191482543945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.13728377223014832, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09067340195178986, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0782749354839325, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.1130591407418251, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10267166793346405, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.0946146696805954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07353027164936066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07024505734443665, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05796395614743233, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04942898824810982, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04394981265068054, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04255518689751625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029433416202664375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.023479361087083817, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.022805988788604736, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020219190046191216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.019309191033244133, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016387352719902992, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01607642136514187, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014605812728404999, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011612831614911556, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10267166793346405, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10267166793346405, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10548993200063705, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0970480814576149, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09126096963882446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08295448124408722, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.049176234751939774, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04458179697394371, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06180449202656746, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05605512112379074, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.050064630806446075, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04353754222393036, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0422738678753376, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03150898963212967, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02680434286594391, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.023706866428256035, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.022924434393644333, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015813471749424934, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012425249442458153, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011888636276125908, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011315266601741314, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010826029814779758, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008402342908084393, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008240306749939919, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007279569748789072, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005588171072304249, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10548993200063705, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10548993200063705, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08691606670618057, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07917964458465576, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07172246277332306, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06529390811920166, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.040086936205625534, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03474005311727524, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05447695404291153, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04942741617560387, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04101654142141342, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03539029881358147, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.034812115132808685, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02765740640461445, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023594770580530167, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.01937257871031761, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01822633109986782, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013787279836833477, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010149363428354263, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009408706799149513, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009218428283929825, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.0084937559440732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007174761034548283, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007034679874777794, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005679722409695387, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0045440588146448135, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08691606670618057, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08691606670618057, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.20617926120758057, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18592776358127594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1756441295146942, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15874823927879333, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09519960731267929, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08602175116539001, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11747536808252335, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.105809286236763, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.097579725086689, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08260997384786606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07985562086105347, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06030154600739479, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05053897574543953, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04560691490769386, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04437168315052986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.030177108943462372, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.023321740329265594, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.022381018847227097, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020740695297718048, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01992109604179859, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015475265681743622, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014577120542526245, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013307920657098293, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008946185000240803, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.105809286236763, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.105809286236763, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.21944473683834076, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1947801411151886, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.18638598918914795, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.15566514432430267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.10143902897834778, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.09271248430013657, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11667463183403015, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10723672807216644, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10337550193071365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.08229001611471176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07464104890823364, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05967286229133606, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.05151032283902168, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0488169901072979, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04817875474691391, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.029927819967269897, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02555393986403942, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.024931389838457108, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.02180221490561962, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.021376119926571846, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015949491411447525, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016285201534628868, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.015046939253807068, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011438664980232716, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10723672807216644, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10723672807216644, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.13742902874946594, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12975558638572693, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12697288393974304, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11590830236673355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06503321975469589, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.062208592891693115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07271428406238556, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06706953048706055, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.0656019002199173, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05860185995697975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.05626777559518814, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03712110221385956, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03217088058590889, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.0312541164457798, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.031040050089359283, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.018605709075927734, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.016260050237178802, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.016058417037129402, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014979050494730473, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01484301220625639, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009962202981114388, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010102440603077412, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009662430733442307, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.007026773877441883, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07271428406238556, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07271428406238556, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22818301618099213, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21588462591171265, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2118026614189148, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1932954043149948, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10787032544612885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10350858420133591, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11983319371938705, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11056957393884659, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.1087566614151001, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09736811369657516, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0934661477804184, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.061076998710632324, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05293501913547516, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.051712773740291595, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05142230913043022, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030542531982064247, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.026608331128954887, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.0263157207518816, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024473048746585846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024293072521686554, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016128109768033028, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016048166900873184, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015723591670393944, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010673325508832932, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10787032544612885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10787032544612885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.21.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.20026762783527374, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.17018042504787445, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.155817449092865, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.13807514309883118, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09036467969417572, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07703866064548492, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11411138623952866, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10375276952981949, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09450634568929672, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07361827790737152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07131215929985046, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05813978239893913, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04994267597794533, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04385567829012871, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.042289599776268005, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02956283465027809, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02352530136704445, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.022764595225453377, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020360738039016724, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.019349120557308197, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016460102051496506, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01631876267492771, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014539939351379871, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011841821484267712, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10375276952981949, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10375276952981949, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1041201800107956, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09525725990533829, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08910058438777924, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08098932355642319, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04837685823440552, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.0435321219265461, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06132883578538895, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.055750347673892975, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04947127401828766, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.042706843465566635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.041577961295843124, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03126610815525055, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.026688408106565475, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.023369276896119118, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.022521058097481728, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015722211450338364, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012310007587075233, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01174822635948658, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011200989596545696, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010674678720533848, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008378245867788792, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008292541839182377, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.00719481660053134, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005701652728021145, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1041201800107956, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1041201800107956, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09034311026334763, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08207866549491882, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07429259270429611, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06753396987915039, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.041698794811964035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.036011952906847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05670125409960747, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.051428526639938354, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.042703934013843536, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03670384734869003, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.035996805876493454, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028781915083527565, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024612193927168846, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02017027884721756, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.018984727561473846, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014410462230443954, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010599913075566292, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009832982905209064, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009632372297346592, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008877051062881947, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007549842353910208, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007403850555419922, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005994435865432024, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004870792385190725, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09034311026334763, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09034311026334763, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1931714564561844, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17288753390312195, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16217078268527985, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.14641371369361877, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08877456933259964, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.07922990620136261, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1112443059682846, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1001524105668068, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09109294414520264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07650697976350784, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07401168346405029, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05685241147875786, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04774349927902222, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04254310205578804, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04122089222073555, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028395362198352814, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0217595212161541, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02076582796871662, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.019255537539720535, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.018392011523246765, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01456028874963522, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013736434280872345, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.012368956580758095, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008410409092903137, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1001524105668068, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1001524105668068, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.204437255859375, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1740264892578125, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16282430291175842, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.12863090634346008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09509365260601044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08381789177656174, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11273816972970963, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1025802344083786, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09714414179325104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06946771591901779, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06462958455085754, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.057669997215270996, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.049501772969961166, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04607012867927551, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04522424936294556, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.0290429275482893, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.024523412808775902, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023726260289549828, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01966726779937744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.019075149670243263, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015881674364209175, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.016300683841109276, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014813543297350407, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.011937793344259262, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1025802344083786, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.1025802344083786, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.14303047955036163, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13504795730113983, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1321260780096054, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.12059232592582703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06762046366930008, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06467396765947342, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0755343809723854, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06977251917123795, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06822541356086731, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.060919441282749176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.05844131112098694, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03849983215332031, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.033411841839551926, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.032447829842567444, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03222043067216873, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019259870052337646, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.016774440184235573, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.016559438779950142, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015422898344695568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.015281240455806255, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01017833687365055, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.0102683762088418, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009856902994215488, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006952141411602497, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0755343809723854, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0755343809723854, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.22994427382946014, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21748536825180054, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21322542428970337, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19466879963874817, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10862618684768677, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10423234850168228, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.1206212267279625, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11142954230308533, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10955782234668732, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09800374507904053, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09396487474441528, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06145564839243889, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05326664447784424, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05200977995991707, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05171271041035652, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03067878447473049, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02661127783358097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02630702778697014, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02444138005375862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024254992604255676, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01604415476322174, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015835357829928398, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015624839812517166, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010237723588943481, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09800374507904053, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09800374507904053, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.22.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.20017245411872864, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16982685029506683, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.15542638301849365, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.13920477032661438, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.09025708585977554, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07688881456851959, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11443367600440979, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10363642126321793, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09448802471160889, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07389418035745621, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07205309718847275, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05821980535984039, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04978271946310997, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04374002665281296, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.042192887514829636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029567085206508636, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.023355308920145035, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.022601298987865448, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020235750824213028, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.019243333488702774, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016492219641804695, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016070110723376274, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014570615254342556, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011541414074599743, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10363642126321793, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10363642126321793, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10261794179677963, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09437272697687149, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0886002704501152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0805286392569542, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04773835465312004, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.043212324380874634, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06010574847459793, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.0546131357550621, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04863091558218002, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04224307835102081, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04104764759540558, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03060910664498806, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02610079199075699, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.023015202954411507, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.022234829142689705, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015339579433202744, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012038091197609901, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011497260071337223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01094902865588665, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01045777928084135, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00811523012816906, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007969340309500694, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007003990933299065, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0053568435832858086, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10261794179677963, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10261794179677963, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0882444754242897, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08009736984968185, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0733114629983902, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06658486276865005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04066215828061104, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.035557955503463745, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.053832605481147766, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04894855618476868, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04161929339170456, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.035740263760089874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03490148484706879, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027268769219517708, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02335476689040661, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.019610118120908737, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01861542835831642, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01362538244575262, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.01021573320031166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009561820887029171, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009241525083780289, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008613579906523228, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007100740913301706, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006936382036656141, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.0057835932821035385, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004487219266593456, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0882444754242897, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0882444754242897, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.20266474783420563, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18444469571113586, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17574572563171387, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15913139283657074, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09389609098434448, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08585764467716217, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11298375576734543, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10266910493373871, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09587512910366058, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08202647417783737, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07891888171434402, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.057606182992458344, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04899197444319725, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.044949378818273544, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04395443946123123, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.028753871098160744, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.022941049188375473, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.022148966789245605, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020507656037807465, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01985391601920128, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.014758161269128323, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014099330641329288, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013090983964502811, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008643043227493763, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10266910493373871, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10266910493373871, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20556776225566864, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17685893177986145, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16598695516586304, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.126765638589859, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09483207762241364, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08385322988033295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11162253469228745, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10257311165332794, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09733230620622635, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07141486555337906, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.060679130256175995, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.056956980377435684, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04912630468606949, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0456460602581501, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.044794004410505295, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.028491012752056122, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.0237614493817091, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.022952022030949593, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01915036514401436, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.018550433218479156, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015010076574981213, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015150666236877441, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.013884888961911201, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010322392918169498, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10257311165332794, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10257311165332794, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.14171218872070312, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.13376447558403015, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.13087806105613708, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11945866048336029, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06698031723499298, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06405070424079895, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07490738481283188, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06908243894577026, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06759241223335266, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.06032579392194748, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.057957615703344345, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03816644847393036, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03311179205775261, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03216532990336418, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.031938809901475906, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.019154837355017662, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.016682442277669907, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01646902970969677, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.015346331521868706, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.015206897631287575, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010264089331030846, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010283239185810089, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009955840185284615, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00706057483330369, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07490738481283188, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07490738481283188, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.2319316267967224, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21917498111724854, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21486160159111023, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1960885226726532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10958817601203918, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10507747530937195, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12181384861469269, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11242086440324783, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11053212732076645, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09880739450454712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09476208686828613, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06212833523750305, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05379181727766991, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.052494414150714874, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.052196696400642395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.031079087406396866, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.026937535032629967, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026629894971847534, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024745136499404907, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024550922214984894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016440702602267265, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016137853264808655, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.0160088911652565, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01058194413781166, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09880739450454712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09880739450454712, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.23.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.19854772090911865, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16789206862449646, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.15244652330875397, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.13805222511291504, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08929964154958725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.0751802921295166, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11396383494138718, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10422561317682266, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09380785375833511, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07362648099660873, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07233534753322601, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.058460380882024765, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.050167832523584366, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04333687946200371, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.041574325412511826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029606174677610397, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.023236267268657684, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.022389117628335953, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020268388092517853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.019143907353281975, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.016261711716651917, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.016229311004281044, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014034658670425415, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011683551594614983, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10422561317682266, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10422561317682266, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.109347864985466, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10065251588821411, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09458089619874954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08608713001012802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05097731202840805, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04613566771149635, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06391580402851105, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05820031464099884, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05188501253724098, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04515644907951355, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0439036600291729, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.0325445793569088, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027874795719981194, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.024601679295301437, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.023779362440109253, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016302352771162987, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012910880148410797, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01233670860528946, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01176932267844677, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011254358105361462, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008579166606068611, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008605028502643108, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007417601533234119, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005870864260941744, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10065251588821411, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10065251588821411, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09436715394258499, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08630906790494919, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.078782819211483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07173729687929153, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0436050146818161, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.038154248148202896, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05851959064602852, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0531134270131588, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.044560931622982025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03863438218832016, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03787408396601677, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.02969840168952942, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.025392279028892517, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.021037215366959572, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.019901428371667862, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014826965518295765, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.011008355766534805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.010260489769279957, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.010023020207881927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.00929166842252016, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007749289274215698, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007558248471468687, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006239541340619326, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004890690091997385, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09436715394258499, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09436715394258499, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.21251492202281952, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1950274258852005, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18692977726459503, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.16968737542629242, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09893981367349625, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09151487797498703, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11733341962099075, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10678938031196594, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10076618194580078, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08708816766738892, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08374691754579544, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.059600524604320526, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05098893120884895, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.0473644956946373, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04650877043604851, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029761768877506256, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.024146096780896187, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.0234279315918684, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021708065643906593, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.021118229255080223, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.01526755653321743, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014667516574263573, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013797837309539318, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008982496336102486, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10678938031196594, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10678938031196594, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.181105837225914, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1624220758676529, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.15321338176727295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.12508352100849152, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.08476496487855911, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.07681255787611008, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10222026705741882, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09348565340042114, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.08602164685726166, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.06813587993383408, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06189034879207611, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05229721963405609, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.044805772602558136, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04081801697611809, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.0398375503718853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.026111910119652748, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02121160179376602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.020468270406126976, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.017884636297822, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.017206091433763504, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.013749523088335991, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01360786147415638, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.012389685958623886, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009079969488084316, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10222026705741882, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10222026705741882, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.13464397192001343, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12703387439250946, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12425591051578522, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11338376998901367, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06361327320337296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.060809843242168427, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07110011577606201, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0656452402472496, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06419181078672409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0572492815554142, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.054962314665317535, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.036247968673706055, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.031436532735824585, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.03052176907658577, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0303095281124115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.018138302490115166, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.01579069159924984, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.0155867338180542, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014506598003208637, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01437445916235447, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009614817798137665, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009673194959759712, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009318814612925053, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006556133273988962, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07110011577606201, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07110011577606201, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23240888118743896, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21958723664283752, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.21527986228466034, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1965034306049347, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10976322740316391, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.1052481159567833, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12234994024038315, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11260653287172318, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11072342097759247, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09891840815544128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09502357244491577, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.062343768775463104, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05390359088778496, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05259942635893822, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.0522986575961113, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.03127066418528557, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.027027085423469543, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026717733591794968, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024822823703289032, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02462768368422985, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01667887717485428, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.016244204714894295, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.016252528876066208, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010710693895816803, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09891840815544128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09891840815544128, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.24.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.19474801421165466, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16383589804172516, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.1476297676563263, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.13500885665416718, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08732227236032486, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07262521237134933, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11359814554452896, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.103338323533535, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.09202820062637329, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07232198119163513, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07138802856206894, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05775058642029762, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04989909008145332, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.0425710566341877, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.04064704477787018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.0294386874884367, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.023113850504159927, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.022190453484654427, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.020322861149907112, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.019122619181871414, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.0164289940148592, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01655094139277935, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014152340590953827, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.012197641655802727, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.103338323533535, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.103338323533535, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.11268675327301025, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10402227938175201, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09795717895030975, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.0891358032822609, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05258631333708763, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04778072610497475, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06569045782089233, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05976095050573349, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05348770320415497, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.046650052070617676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04529489949345589, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03341082111001015, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.028583280742168427, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.0253288671374321, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.024510683491826057, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.016736872494220734, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.013210967183113098, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012636978179216385, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.012029586359858513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011525138281285763, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008773372508585453, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00867721438407898, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007612280081957579, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005797220394015312, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10402227938175201, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.10402227938175201, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09318677335977554, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.085525281727314, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07817680388689041, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07117079943418503, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04310886934399605, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.037885282188653946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05715176463127136, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.052307624369859695, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04401082545518875, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.038222286850214005, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03740549832582474, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028935741633176804, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02492067590355873, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.020789658650755882, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01969672180712223, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014463488012552261, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.010804583318531513, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.01010989397764206, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009839808568358421, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009155051782727242, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007519710808992386, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007340305484831333, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.006100332364439964, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004693687427788973, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09318677335977554, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.09318677335977554, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.2104690670967102, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1937192976474762, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.18585534393787384, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.16850683093070984, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09799866378307343, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09082368016242981, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11670379340648651, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10570928454399109, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09975200146436691, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08644429594278336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.08307191729545593, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.0593915656208992, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05043710768222809, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04689471423625946, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.046057671308517456, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029679810628294945, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.023885414004325867, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02319132350385189, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.021486105397343636, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.020923305302858353, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015213584527373314, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014476899057626724, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013714677654206753, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008821799419820309, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10570928454399109, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10570928454399109, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.21131618320941925, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.18065045773983002, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17006634175777435, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.13735169172286987, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09873319417238235, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08728691190481186, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.11522502452135086, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10461338609457016, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.10080303251743317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07374746352434158, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.0660933181643486, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.058700110763311386, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.0502154678106308, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04750699922442436, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.046847790479660034, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.029503941535949707, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.02458917163312435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023848477751016617, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019564487040042877, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.019094238057732582, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.015729177743196487, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.015286698937416077, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014840465039014816, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010222152806818485, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10461338609457016, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.10461338609457016, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.13492578268051147, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1271890252828598, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12441036105155945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11349061131477356, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06381701678037643, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.060940466821193695, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07145054638385773, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06586802005767822, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06441561132669449, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.057396307587623596, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.055106356739997864, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.0365133173763752, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03158501535654068, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.030649542808532715, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03043495863676071, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01830364763736725, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.015913162380456924, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.0157080739736557, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014622301794588566, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.014485747553408146, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.00983370840549469, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009834226220846176, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009530472569167614, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006773621309548616, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07145054638385773, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07145054638385773, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.23118220269680023, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.21827466785907745, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.213943749666214, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19522330164909363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10926470905542374, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10469765216112137, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12133052200078964, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.1121155172586441, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.11024852097034454, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09840544313192368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.09425091743469238, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06174764037132263, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05360373109579086, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05229860544204712, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.051989905536174774, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030826903879642487, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.026753878220915794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.026434797793626785, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02451968751847744, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024326125159859657, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016059698536992073, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015898341313004494, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015618217177689075, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010237096808850765, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09840544313192368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09840544313192368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.25.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.19262972474098206, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16272173821926117, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.14650794863700867, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.13404089212417603, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08616224676370621, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07187763601541519, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11239319294691086, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10273754596710205, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.0909661054611206, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07199220359325409, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07098886370658875, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.057544272392988205, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04948646202683449, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04184875637292862, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.03987228125333786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.028981782495975494, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.022501014173030853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.021540340036153793, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.01982228271663189, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.018547281622886658, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01579386182129383, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015943823382258415, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.013350908644497395, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011405053548514843, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10273754596710205, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10273754596710205, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10602004081010818, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09731649607419968, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0908602774143219, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08271627128124237, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04933752119541168, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.044365059584379196, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06258929520845413, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05709189549088478, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05031472072005272, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04371816664934158, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04249003902077675, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03186914697289467, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027358222752809525, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.023815495893359184, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02291213907301426, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.01595437340438366, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012505189515650272, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011890851892530918, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01139635220170021, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010825378820300102, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008341971784830093, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008401419967412949, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007094335742294788, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005678575020283461, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10602004081010818, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10602004081010818, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08920645713806152, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08129163086414337, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07318461686372757, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06667166948318481, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.041108936071395874, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03542003035545349, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05658840760588646, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.051322173327207565, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04205534979701042, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.036353375762701035, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03573125600814819, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028570789843797684, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02446242794394493, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.019851138815283775, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.018622588366270065, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014295660890638828, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.01038980484008789, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.00960033480077982, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009450339712202549, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008673107251524925, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0074965935200452805, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007237179204821587, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005910640116780996, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004658616613596678, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08920645713806152, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08920645713806152, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.20237797498703003, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.18336892127990723, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.17339754104614258, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15722611546516418, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09362316876649857, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08475496619939804, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11507157236337662, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.104034923017025, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09570968151092529, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.08154333382844925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07889692485332489, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.058749474585056305, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04963339492678642, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.044835641980171204, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04362819716334343, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029369771480560303, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.022896189242601395, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.021997269243001938, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02044215239584446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.019643094390630722, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015069146640598774, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014266444370150566, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01308119110763073, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.008698949590325356, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.104034923017025, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.104034923017025, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.20010818541049957, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17665597796440125, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.1690438836812973, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.13999399542808533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09353063255548477, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08481254428625107, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10637816786766052, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09788107126951218, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09534978121519089, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.0744243785738945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06650958955287933, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05439021810889244, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.046893756836652756, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.044919703155756, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04444262385368347, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.027177702635526657, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.023180041462183, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.0226259995251894, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019336309283971786, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.018999876454472542, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014270223677158356, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.01424909196794033, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01362861879169941, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.009484673850238323, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10637816786766052, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10637816786766052, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.13602524995803833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12813720107078552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12532849609851837, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11425214260816574, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06431794166564941, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06141435354948044, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07184814661741257, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06640943139791489, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06492308527231216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0577828548848629, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.055365532636642456, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03661766275763512, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03181540593504906, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.030863799154758453, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.030638325959444046, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01831008866429329, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.015950286760926247, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.015739604830741882, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014634826220571995, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01449417881667614, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009639700874686241, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009765956550836563, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009322765283286572, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.0065998793579638, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07184814661741257, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.07184814661741257, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.229375422000885, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2166188359260559, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.2122027426958084, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.19343282282352448, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10846932232379913, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.10386663675308228, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.12061044573783875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.11139332503080368, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10944453626871109, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09759095311164856, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.0935068428516388, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.06142159923911095, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.05329549312591553, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.05197383463382721, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.05165475606918335, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.030696725472807884, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02668224647641182, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02636447176337242, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.024459917098283768, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.024257486686110497, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.016105875372886658, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.01600414514541626, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015663810074329376, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.01050096657127142, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09759095311164856, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09759095311164856, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.26.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.19128549098968506, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16199760138988495, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.14614030718803406, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.1335698962211609, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08584345132112503, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07178221642971039, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11177997291088104, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10196249186992645, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.0903993621468544, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07170116901397705, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.07059600949287415, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05713842064142227, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04915847256779671, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.04175008460879326, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.03982488438487053, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.029160549864172935, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02250462956726551, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.021571379154920578, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.019835984334349632, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.01862259767949581, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01636050082743168, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01597404107451439, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.014072852209210396, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011550725437700748, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10196249186992645, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10196249186992645, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10776379704475403, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09911785274744034, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.09361616522073746, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.08522148430347443, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.05012182891368866, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04574679210782051, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06210818141698837, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.056557562202215195, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.05115947872400284, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04445617273449898, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04310436174273491, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03166985511779785, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.027058450505137444, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02417452447116375, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.023445017635822296, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015859605744481087, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012640055269002914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.012146418914198875, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011517703533172607, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.011057311668992043, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008358554914593697, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.008306704461574554, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.007294598501175642, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005619602277874947, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10776379704475403, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10776379704475403, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0916210487484932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.08436310291290283, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.07896091043949127, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.07183130830526352, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.04259788990020752, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.03845452144742012, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05385322496294975, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04915348440408707, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.04337566718459129, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03776026889681816, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03667561709880829, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027296004816889763, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023494571447372437, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.02049998752772808, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01975853368639946, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013650286011397839, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.0106887798756361, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.010185410268604755, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.009735476225614548, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.009258848614990711, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0071454523131251335, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007075723726302385, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.00610284972935915, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.00467346329241991, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0916210487484932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0916210487484932, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.19143371284008026, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17597965896129608, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16887570917606354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15302608907222748, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.08920562267303467, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08254557102918625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1052696630358696, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.0963636264204979, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.090811587870121, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07848287373781204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07559210062026978, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.05359948053956032, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04601184278726578, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.042678095400333405, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04190657660365105, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.026721300557255745, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.021724794059991837, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02107761614024639, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.01952889934182167, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.019001424312591553, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.013697896152734756, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.013170884922146797, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.01245245710015297, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.007995456457138062, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1052696630358696, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.1052696630358696, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1940961480140686, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.17444494366645813, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.16752731800079346, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1408996284008026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09129304438829422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08410260826349258, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10465066134929657, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09578704088926315, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.09273039549589157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07422825694084167, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.06849159300327301, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.05385094881057739, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04615595191717148, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04404468461871147, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04355230554938316, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02713838405907154, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.023184238001704216, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.02271011658012867, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.019853923469781876, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.01952631026506424, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014789016917347908, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.014894750900566578, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.01408845279365778, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.01068625133484602, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10465066134929657, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.10465066134929657, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.1311478614807129, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12330430746078491, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12046124041080475, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.10981164872646332, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06205355003476143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.05910288169980049, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06960754096508026, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06422664225101471, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06268146634101868, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05567000061273575, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.05334101989865303, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03552825748920441, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.030809083953499794, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.02981187403202057, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.029579022899270058, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.017783470451831818, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.015459357760846615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.015241995453834534, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014176735654473305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.01402987726032734, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009438532404601574, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009546232409775257, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009114783257246017, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006547152064740658, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06960754096508026, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06960754096508026, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.217949777841568, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.2054276466369629, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.20115934312343597, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.18343420326709747, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.10308659076690674, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.09859943389892578, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.11487825959920883, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10599680244922638, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.10405081510543823, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.09264782071113586, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08876010775566101, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.058633070439100266, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.050742872059345245, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04941694065928459, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04909956455230713, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.029302477836608887, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.025408582761883736, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.02509799413383007, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.023282019421458244, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.023087354376912117, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.015489963814616203, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015319236554205418, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.015052445232868195, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.010129198431968689, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10599680244922638, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.10599680244922638, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.27.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.19023369252681732, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.16073864698410034, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.14483292400836945, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.13153532147407532, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08551385998725891, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.07128018140792847, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.11214467138051987, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10137984901666641, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.08993667364120483, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.07073383033275604, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.06961683928966522, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05715448409318924, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.048732779920101166, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.041537992656230927, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.039679814130067825, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02901988849043846, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.02233327366411686, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.021415874361991882, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.019544508308172226, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.01835625246167183, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.01616150140762329, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01576482504606247, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.0138510437682271, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.011341901496052742, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10137984901666641, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.10137984901666641, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10134534537792206, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.0929790660738945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.0859546884894371, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07827335596084595, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.047092266380786896, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.041902512311935425, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06191590428352356, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05595682933926582, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04816083237528801, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.04183771088719368, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.040955327451229095, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03161437809467316, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.026839029043912888, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02278183586895466, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.021741988137364388, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015823891386389732, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.012045287527143955, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.011377165094017982, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.011023358441889286, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010375570505857468, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008317502215504646, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00829974003136158, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006845749448984861, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005675540771335363, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10134534537792206, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.10134534537792206, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08355291932821274, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07601465284824371, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.0667213648557663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06086503341794014, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03835965692996979, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0321226641535759, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05556371062994003, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05019907280802727, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03938312828540802, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03405030444264412, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03373899310827255, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028017273172736168, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.023904088884592056, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.018594050779938698, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.017128251492977142, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014017073437571526, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009829631075263023, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.008922774344682693, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00899707991629839, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008081742562353611, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.0073089892975986, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007135083433240652, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005439243745058775, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004645156674087048, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08355291932821274, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08355291932821274, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.1980273723602295, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.1787605583667755, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.1679355800151825, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1521996557712555, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09149541705846786, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08215104788541794, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11439753323793411, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10322773456573486, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.0937391072511673, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.0796373188495636, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07721127569675446, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.058671966195106506, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.04939601942896843, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.04390355944633484, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04253027215600014, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.029397299513220787, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02259366773068905, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.021583672612905502, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020203184336423874, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.01930570974946022, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015133246779441833, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.014440969564020634, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.012847003526985645, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009104663506150246, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10322773456573486, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10322773456573486, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.2010820209980011, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1789107620716095, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.17135143280029297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.1483514904975891, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.09355556964874268, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.08548278361558914, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1074189767241478, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.09829447418451309, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.0953027680516243, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.07703997194766998, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.07232099026441574, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.055086392909288406, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.04724830389022827, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.04509446769952774, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.04457920044660568, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02755054458975792, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.023607075214385986, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.023072242736816406, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.020346535369753838, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.020003151148557663, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.014739971607923508, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.0150102274492383, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.014048323966562748, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010585511103272438, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1074189767241478, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.1074189767241478, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.12099146097898483, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.11356168985366821, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.11067085713148117, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1008676290512085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.057224731892347336, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.05436673387885094, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06453553587198257, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.059542540460824966, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.05783064290881157, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.051233138889074326, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.049101490527391434, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03297331929206848, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.028566621243953705, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.02750813215970993, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.02725888416171074, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.016501352190971375, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.014290731400251389, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.014066956005990505, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.013094471767544746, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.012936875224113464, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.008775830268859863, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.00888944324105978, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.008428490720689297, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006119296420365572, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1008676290512085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1008676290512085, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.19810961186885834, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.1864711493253708, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.18226175010204315, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.1660279780626297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.09371750056743622, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.08945915848016739, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10488709062337875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.0967448502779007, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.09464003145694733, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.08408321440219879, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.08048176765441895, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.05359574034810066, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04633990302681923, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.04497521370649338, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.044655609875917435, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02683059684932232, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.02324426732957363, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.022937312722206116, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02129482291638851, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.02109222300350666, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.01428156066685915, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.014212322421371937, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.0138300321996212, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00959307886660099, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10488709062337875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.10488709062337875, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.28.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.1779419481754303, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.15017500519752502, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.13642922043800354, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.12106598168611526, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08040371537208557, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.06780757755041122, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10247592628002167, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09339166432619095, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.08433185517787933, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.06535261124372482, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.06328119337558746, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.05256339907646179, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04515223205089569, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.03920697048306465, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.03767243027687073, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.02683371677994728, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.021313827484846115, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.020578574389219284, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.018528331071138382, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.017550883814692497, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.015035945922136307, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.015161896124482155, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.013172418810427189, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01129378005862236, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10247592628002167, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.10247592628002167, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09914839267730713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.09102348238229752, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08441296964883804, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07686759531497955, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.045997049659490585, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.04107639193534851, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.060708433389663696, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05424810200929642, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.047053322196006775, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.040882401168346405, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.04004476219415665, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.03097616881132126, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.025976942852139473, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02220117673277855, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.021236464381217957, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015520607121288776, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.011685759760439396, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01106194220483303, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010683353990316391, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.010079084895551205, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008146699517965317, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007955263368785381, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006684534717351198, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005368157289922237, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09914839267730713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09914839267730713, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0830821767449379, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07681319862604141, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06985095143318176, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.06360248476266861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03853848949074745, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.0338052473962307, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05252518877387047, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.047559965401887894, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.039278607815504074, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03443964198231697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.033917807042598724, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.026727300137281418, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.022660870105028152, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.018637238070368767, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01756139099597931, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.013381576165556908, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009782289154827595, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.009108161553740501, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.00896923616528511, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008309854194521904, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.006963656283915043, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.006791425868868828, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005490519106388092, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004460722673684359, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0830821767449379, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.0830821767449379, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.22763502597808838, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.21027769148349762, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.20152904093265533, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.18355531990528107, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10674091428518295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.09898350387811661, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.12735624611377716, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.11555637419223785, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.10856599360704422, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.09457186609506607, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.09135246276855469, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06512699276208878, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.055389031767845154, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.051181718707084656, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.05015000328421593, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.032607853412628174, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.026182159781455994, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02538566291332245, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02368476614356041, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.023016396909952164, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.016771448776125908, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.016086915507912636, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.015041273087263107, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.010014641098678112, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10674091428518295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.10674091428518295, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.1597098857164383, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.13469375669956207, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.12617836892604828, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.10874171555042267, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.07474759221076965, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.06573887914419174, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.08754078298807144, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07859817892313004, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.07635536044836044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05690407007932663, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.05348113179206848, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.04499341547489166, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.037794940173625946, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.036030516028404236, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.03560595586895943, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.022521940991282463, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.018781324848532677, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.018257534131407738, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.015251630917191505, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.014949599280953407, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.011889674700796604, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.011849241331219673, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.011275272816419601, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.00824002269655466, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.08754078298807144, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.08754078298807144, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.11998185515403748, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.11236770451068878, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.10938851535320282, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.09961897879838943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.05668626353144646, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.05377371236681938, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0641046017408371, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.05913320556282997, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.05734250694513321, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05068519711494446, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.04861009120941162, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03274247422814369, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.0284064169973135, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.02729618363082409, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.027035018429160118, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01639077626168728, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.014263493940234184, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.014036159962415695, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.013075819239020348, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.012908641248941422, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.008745227940380573, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.009006517939269543, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.008383114822208881, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.006336080841720104, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.09961897879838943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.09961897879838943, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.17298932373523712, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.16251924633979797, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.15868857502937317, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.14459636807441711, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.08239952474832535, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.07851318269968033, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09243740141391754, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.08528973907232285, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.08325766026973724, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.07386428862810135, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.07092628628015518, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.047703322023153305, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.04165762662887573, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.040381982922554016, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.04008082300424576, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.02403390407562256, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.022252874448895454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.021986152976751328, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.02068621665239334, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.020517757162451744, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.013562876731157303, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.015444920398294926, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.013165823183953762, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.012430710718035698, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09243740141391754, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.09243740141391754, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.29.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.17608661949634552, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.15076200664043427, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.13912497460842133, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.12211452424526215, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.08006656169891357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.06905052810907364, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09970266371965408, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.09058055281639099, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.08323042094707489, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.06529980152845383, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.06213339790701866, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.050648126751184464, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.04352260008454323, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.038763727992773056, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.03756049647927284, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.025491897016763687, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.020663434639573097, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.020064616575837135, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.017853019759058952, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.017068563029170036, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.013777703046798706, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.014080089516937733, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.012189573608338833, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.01012296974658966, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09970266371965408, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.09970266371965408, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09597598761320114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08773127943277359, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07995614409446716, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07278919219970703, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.04455683380365372, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.038898173719644547, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.06050325557589531, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.054304491728544235, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.045553360134363174, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03937367722392082, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03873462602496147, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.030872099101543427, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02601701393723488, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02162657119333744, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.02046707272529602, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.015514389611780643, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.011511335149407387, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01076338067650795, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010507708415389061, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.009794274345040321, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.008201739750802517, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.00811509694904089, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006597194820642471, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.005611756816506386, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09597598761320114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09597598761320114, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08337612450122833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07590749859809875, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06608553975820541, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.060228701680898666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.038297221064567566, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.031764522194862366, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05656501650810242, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.05081664025783539, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03933862969279289, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03398551791906357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03375278413295746, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.028706267476081848, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.024210378527641296, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.018552571535110474, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01699291355907917, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.014422212727367878, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009830562397837639, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.008846002630889416, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008994936011731625, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.007994581013917923, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007511588279157877, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007205389440059662, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005465866066515446, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004643453750759363, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08337612450122833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08337612450122833, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.19928093254566193, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17895923554897308, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16671447455883026, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.1511838436126709, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.09190095961093903, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08160120993852615, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11751045286655426, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10544858872890472, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09432977437973022, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07971374690532684, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07771320641040802, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.06036141887307167, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.05051920935511589, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.044235702604055405, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.042596615850925446, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03034386970102787, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02293100208044052, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.021761003881692886, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.020489800721406937, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.019438279792666435, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015702540054917336, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015013039112091064, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013024675659835339, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.0096958689391613, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10544858872890472, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.10544858872890472, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.14545632898807526, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.1303013116121292, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.12525078654289246, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.10844063758850098, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.06793921440839767, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.06283493340015411, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07786040008068085, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.07094207406044006, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.06898888200521469, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.05602502450346947, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.05270205810666084, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.040030352771282196, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.034196194261312485, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.032828062772750854, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.03250030800700188, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.02005155198276043, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.017332006245851517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.01700649783015251, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.01502489298582077, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.014806457795202732, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.010825629346072674, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.011191791854798794, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.010387344285845757, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.008150888606905937, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07786040008068085, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.07786040008068085, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.12222793698310852, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.11469649523496628, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.11177745461463928, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.10186372697353363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.058170709758996964, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.05526959151029587, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06559427827596664, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06051512435078621, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.05877574533224106, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.0521075576543808, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.05007535219192505, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03385717421770096, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.02957494556903839, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.028529876843094826, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.028274714946746826, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.017100168392062187, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.015701403841376305, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.015497731044888496, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.014587713405489922, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.014441726729273796, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.009642881341278553, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.010910401120781898, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.009311766363680363, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.008746319450438023, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.10186372697353363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.10186372697353363, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.13531121611595154, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.12709590792655945, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.12405291199684143, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.11298157274723053, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.06434469670057297, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.06130281090736389, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0722503736615181, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.06661736965179443, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.06499843299388885, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.05765736103057861, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.05537857860326767, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.037272095680236816, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.03242669254541397, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.031408775597810745, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.03116806037724018, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01877564936876297, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.017110660672187805, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.016890965402126312, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01586253009736538, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.015715831890702248, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.010517810471355915, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.011653563007712364, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.010199932381510735, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.009170696139335632, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0722503736615181, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.0722503736615181, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.30.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.14865104854106903, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.12776009738445282, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.11851165443658829, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.10255352407693863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.06828252226114273, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.05948132649064064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.08501417934894562, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.07574266195297241, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.07043397426605225, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.055151309818029404, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.052313029766082764, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.04331948608160019, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.037098467350006104, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.03365637734532356, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.0327826589345932, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.022295977920293808, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.018836716189980507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.018375476822257042, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.01650640368461609, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.01594032533466816, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.013081121258437634, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.01369952317327261, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.012057394720613956, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.010919157415628433, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.10255352407693863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.10255352407693863, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.q_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.0923425629734993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08431018143892288, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.07691343128681183, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.06998924911022186, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.042826637625694275, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.03744270280003548, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.05773616582155228, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.05211072042584419, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.043833617120981216, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.03781874477863312, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03709028288722038, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.029447901993989944, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02494679018855095, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.02074086107313633, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.019630469381809235, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.014747964218258858, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.010983112268149853, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.010276470333337784, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.010012718848884106, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.009320969693362713, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00776763167232275, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.007680833805352449, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.006250523962080479, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.0052190194837749004, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.0923425629734993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.0923425629734993, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.k_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08189938962459564, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.07444483041763306, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.06533430516719818, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.05961564928293228, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.03767747804522514, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.031609971076250076, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.05520181730389595, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.04918386787176132, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.03867245465517044, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.03335438296198845, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.03315602242946625, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.027982467785477638, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.02342839352786541, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.018325861543416977, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.01689782179892063, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.01404135674238205, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.009755920618772507, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.00888837967067957, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.008923296816647053, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.008049162104725838, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.007352439686655998, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.007137687876820564, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.005470838863402605, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.004777544178068638, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08189938962459564, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.08189938962459564, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.v_proj", + "numel": 4194304, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.18896484375, + "total_bits": 9181184.0, + "err": 0.20213909447193146, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.37646484375, + "total_bits": 9967616.0, + "err": 0.17918755114078522, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.62646484375, + "total_bits": 11016192.0, + "err": 0.16556887328624725, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.72021484375, + "total_bits": 11409408.0, + "err": 0.15021999180316925, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.22021484375, + "total_bits": 13506560.0, + "err": 0.0927625223994255, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.68896484375, + "total_bits": 15472640.0, + "err": 0.08124331384897232, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.0316162109375, + "total_bits": 12715520.0, + "err": 0.11954531818628311, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1076556071639061, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.18896484375, + "total_bits": 13375488.0, + "err": 0.09571897983551025, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.53271484375, + "total_bits": 14817280.0, + "err": 0.07976558804512024, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.656982421875, + "total_bits": 15338496.0, + "err": 0.07786509394645691, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.0316162109375, + "total_bits": 16909824.0, + "err": 0.061386313289403915, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.12646484375, + "total_bits": 17307648.0, + "err": 0.051505956798791885, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.22021484375, + "total_bits": 17700864.0, + "err": 0.044633351266384125, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.31396484375, + "total_bits": 18094080.0, + "err": 0.04286620765924454, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.0316162109375, + "total_bits": 21104128.0, + "err": 0.03078858181834221, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.22021484375, + "total_bits": 21895168.0, + "err": 0.02305855229496956, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.37646484375, + "total_bits": 22550528.0, + "err": 0.02178221009671688, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.53271484375, + "total_bits": 23205888.0, + "err": 0.02042827196419239, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.72021484375, + "total_bits": 23992320.0, + "err": 0.019277915358543396, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.0316162109375, + "total_bits": 25298432.0, + "err": 0.015856551006436348, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.12646484375, + "total_bits": 25696256.0, + "err": 0.015078328549861908, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.2191162109375, + "total_bits": 26084864.0, + "err": 0.013058601878583431, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.12646484375, + "total_bits": 34084864.0, + "err": 0.009536531753838062, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1076556071639061, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "1.0:3b 32g s4", + "bpw": 3.12646484375, + "total_bits": 13113344.0, + "err": 0.1076556071639061, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.self_attn.o_proj", + "numel": 16777216, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09675637632608414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 39852032.0, + "err": 0.08497750014066696, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 44046336.0, + "err": 0.08119482547044754, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7191162109375, + "total_bits": 45619200.0, + "err": 0.07078921794891357, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2191162109375, + "total_bits": 54007808.0, + "err": 0.043865468353033066, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6878662109375, + "total_bits": 61872128.0, + "err": 0.03954734653234482, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 50857472.0, + "err": 0.052528265863657, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 52434944.0, + "err": 0.04607795178890228, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1878662109375, + "total_bits": 53483520.0, + "err": 0.04468345642089844, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5316162109375, + "total_bits": 59250688.0, + "err": 0.035504620522260666, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.65643310546875, + "total_bits": 61344768.0, + "err": 0.03474436700344086, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 67634688.0, + "err": 0.026327921077609062, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 69212160.0, + "err": 0.02383008413016796, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.2191162109375, + "total_bits": 70785024.0, + "err": 0.022857781499624252, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.3128662109375, + "total_bits": 72357888.0, + "err": 0.022644907236099243, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 84411904.0, + "err": 0.013966590166091919, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.2191162109375, + "total_bits": 87562240.0, + "err": 0.014332271181046963, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.3753662109375, + "total_bits": 90183680.0, + "err": 0.014138968661427498, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.5316162109375, + "total_bits": 92805120.0, + "err": 0.013082130812108517, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.7191162109375, + "total_bits": 95950848.0, + "err": 0.012979450635612011, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 101189120.0, + "err": 0.00868378859013319, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 102766592.0, + "err": 0.011625132523477077, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218841552734375, + "total_bits": 104334848.0, + "err": 0.008412836119532585, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 136321024.0, + "err": 0.010528252460062504, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09675637632608414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1878662109375, + "total_bits": 36706304.0, + "err": 0.09675637632608414, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.gate_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.12037920951843262, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.113413505256176, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.1108781173825264, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.10088596493005753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.05693679675459862, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.0543363131582737, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.06392231583595276, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.05887763202190399, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.057469554245471954, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.051056183874607086, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.048978500068187714, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.03261282667517662, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.02821357548236847, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.02733665145933628, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.027131004258990288, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.016308443620800972, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.014127643778920174, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.013929376378655434, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.01293555460870266, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.012806203216314316, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.00859369058161974, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.008653255179524422, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.008298429660499096, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.005846256390213966, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.10088596493005753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.10088596493005753, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.up_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.0877724289894104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3751046316964284, + "total_bits": 139466752.0, + "err": 0.08252569288015366, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6251046316964284, + "total_bits": 154146816.0, + "err": 0.08059944212436676, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7188546316964284, + "total_bits": 159651840.0, + "err": 0.07330089062452316, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2188546316964284, + "total_bits": 189011968.0, + "err": 0.041517700999975204, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.6876046316964284, + "total_bits": 216537088.0, + "err": 0.03956906497478485, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031276157924107, + "total_bits": 177997312.0, + "err": 0.046681687235832214, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1251046316964284, + "total_bits": 183506944.0, + "err": 0.042941488325595856, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1876046316964284, + "total_bits": 187176960.0, + "err": 0.041909363120794296, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5313546316964284, + "total_bits": 207362048.0, + "err": 0.03716200962662697, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6563023158482144, + "total_bits": 214699008.0, + "err": 0.035676777362823486, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031276157924107, + "total_bits": 236717567.99999997, + "err": 0.023900147527456284, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.125104631696429, + "total_bits": 242227200.0, + "err": 0.020655911415815353, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.218854631696429, + "total_bits": 247732224.0, + "err": 0.02001292072236538, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.312604631696429, + "total_bits": 253237248.0, + "err": 0.019855719059705734, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031276157924107, + "total_bits": 295437824.0, + "err": 0.01200788002461195, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.218854631696429, + "total_bits": 306452480.0, + "err": 0.010533078573644161, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.375104631696429, + "total_bits": 315627520.0, + "err": 0.01039483305066824, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.531354631696429, + "total_bits": 324802560.0, + "err": 0.00968555361032486, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.718854631696429, + "total_bits": 335812608.0, + "err": 0.009587543085217476, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031276157924107, + "total_bits": 354158080.0, + "err": 0.006532559171319008, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.125104631696429, + "total_bits": 359667712.0, + "err": 0.00672296853736043, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.218776157924107, + "total_bits": 365168128.0, + "err": 0.00632643373683095, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.125104631696429, + "total_bits": 477108224.0, + "err": 0.00486238906159997, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.0877724289894104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1876046316964284, + "total_bits": 128456703.99999999, + "err": 0.0877724289894104, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + }, + { + "key": "model.layers.31.mlp.down_proj", + "numel": 58720256, + "options": [ + { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.07906506210565567, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:3b/0.75:2b 32g s4", + "bpw": 2.3753662109375, + "total_bits": 139482112.0, + "err": 0.06899664551019669, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.25:4b/0.75:2b 32g s4", + "bpw": 2.6253662109375, + "total_bits": 154162176.0, + "err": 0.06410200893878937, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", + "bpw": 2.7235804966517856, + "total_bits": 159929344.0, + "err": 0.055055394768714905, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.4, + 0.5 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:4b/0.9:3b 32g s4", + "bpw": 3.2235804966517856, + "total_bits": 189289472.0, + "err": 0.036659471690654755, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.2:6b/0.8:3b 32g s4", + "bpw": 3.7146519252232144, + "total_bits": 218125312.0, + "err": 0.03224587440490723, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 3 + ], + "bits_prop": [ + 0.2, + 0.8 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 128g s4", + "bpw": 3.031341552734375, + "total_bits": 178001152.0, + "err": 0.04642369598150253, + "qparams": { + "group_size": 128, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:3b 32g s4", + "bpw": 3.1253662109375, + "total_bits": 183522304.0, + "err": 0.04131557047367096, + "qparams": { + "group_size": 32, + "bits": [ + 3 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:4b/0.95:3b 32g s4", + "bpw": 3.1789376395089284, + "total_bits": 186668032.0, + "err": 0.037721648812294006, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:4b/0.6:3b 32g s4", + "bpw": 3.5271519252232144, + "total_bits": 207115264.0, + "err": 0.030040787532925606, + "qparams": { + "group_size": 32, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.6:4b/0.4:3b 64g s4", + "bpw": 3.6608973911830356, + "total_bits": 214968832.0, + "err": 0.028514672070741653, + "qparams": { + "group_size": 64, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.6, + 0.4 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 128g s4", + "bpw": 4.031341552734375, + "total_bits": 236721408.0, + "err": 0.023893175646662712, + "qparams": { + "group_size": 128, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:4b 32g s4", + "bpw": 4.1253662109375, + "total_bits": 242242560.0, + "err": 0.020277757197618484, + "qparams": { + "group_size": 32, + "bits": [ + 4 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:5b/0.9:4b 32g s4", + "bpw": 4.223580496651786, + "total_bits": 248009728.0, + "err": 0.01824031211435795, + "qparams": { + "group_size": 32, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:4b 32g s4", + "bpw": 4.321794782366071, + "total_bits": 253776896.0, + "err": 0.01770840212702751, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:5b 128g s4", + "bpw": 5.031341552734375, + "total_bits": 295441664.0, + "err": 0.012422889471054077, + "qparams": { + "group_size": 128, + "bits": [ + 5 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:6b/0.9:5b 32g s4", + "bpw": 5.223580496651786, + "total_bits": 306729984.0, + "err": 0.010433648712933064, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", + "bpw": 5.339651925223214, + "total_bits": 313545728.0, + "err": 0.010192292742431164, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.05, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.4:6b/0.6:5b 32g s4", + "bpw": 5.527151925223214, + "total_bits": 324555776.0, + "err": 0.009297442622482777, + "qparams": { + "group_size": 32, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.4, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", + "bpw": 5.723580496651786, + "total_bits": 336090112.0, + "err": 0.009000576101243496, + "qparams": { + "group_size": 32, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.3, + 0.6 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 128g s4", + "bpw": 6.031341552734375, + "total_bits": 354161920.0, + "err": 0.007414192892611027, + "qparams": { + "group_size": 128, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:6b 32g s4", + "bpw": 6.1253662109375, + "total_bits": 359683072.0, + "err": 0.007832088507711887, + "qparams": { + "group_size": 32, + "bits": [ + 6 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + }, + { + "desc": "0.1:8b/0.9:6b 128g s4", + "bpw": 6.227770124162946, + "total_bits": 365696256.0, + "err": 0.006785419303923845, + "qparams": { + "group_size": 128, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "desc": "1.0:8b 32g s4", + "bpw": 8.1253662109375, + "total_bits": 477123584.0, + "err": 0.006453216075897217, + "qparams": { + "group_size": 32, + "bits": [ + 8 + ], + "bits_prop": [ + 1.0 + ], + "scale_bits": 4 + } + } + ], + "best_option_max": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.07906506210565567, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + "best_option": { + "desc": "0.05:3b/0.95:2b 32g s4", + "bpw": 2.1789376395089284, + "total_bits": 127947775.99999999, + "err": 0.07906506210565567, + "qparams": { + "group_size": 32, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + } + } + ], + "last_module_idx": 66, + "base_perplexity": 9.084619366964484 +} \ No newline at end of file